mirror of
https://github.com/python/cpython.git
synced 2025-11-01 14:11:41 +00:00
Marc-Andre Lemburg:
* New exported API PyUnicode_Resize() * The experimental Keep-Alive optimization was turned back on after some tweaks to the implementation. It should now work without causing core dumps... this has yet to tested though (switching it off is easy: see the unicodeobject.c file for details). * Fixed a memory leak in the Unicode freelist cleanup code. * Added tests to correctly process the return code from _PyUnicode_Resize(). * Fixed a bug in the 'ignore' error handling routines of some builtin codecs. Added test cases for these to test_unicode.py.
This commit is contained in:
parent
90daa87569
commit
fd4b957b06
1 changed files with 79 additions and 22 deletions
|
|
@ -76,6 +76,7 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
|
||||||
#ifdef MS_WIN32
|
#ifdef MS_WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Limit for the Unicode object free list */
|
/* Limit for the Unicode object free list */
|
||||||
|
|
||||||
#define MAX_UNICODE_FREELIST_SIZE 1024
|
#define MAX_UNICODE_FREELIST_SIZE 1024
|
||||||
|
|
@ -87,18 +88,17 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
|
||||||
limit. This reduces malloc() overhead for small Unicode objects.
|
limit. This reduces malloc() overhead for small Unicode objects.
|
||||||
|
|
||||||
At worst this will result in MAX_UNICODE_FREELIST_SIZE *
|
At worst this will result in MAX_UNICODE_FREELIST_SIZE *
|
||||||
(sizeof(PyUnicodeObject) + STAYALIVE_SIZE_LIMIT +
|
(sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
|
||||||
malloc()-overhead) bytes of unused garbage.
|
malloc()-overhead) bytes of unused garbage.
|
||||||
|
|
||||||
Setting the limit to 0 effectively turns the feature off.
|
Setting the limit to 0 effectively turns the feature off.
|
||||||
|
|
||||||
XXX The feature is currently turned off because there are
|
Note: This is an experimental feature ! If you get core dumps when
|
||||||
apparently some lingering bugs in its implementation which I
|
using Unicode objects, turn this feature off.
|
||||||
haven't yet been able to sort out.
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define STAYALIVE_SIZE_LIMIT 0
|
#define KEEPALIVE_SIZE_LIMIT 9
|
||||||
|
|
||||||
/* Endianness switches; defaults to little endian */
|
/* Endianness switches; defaults to little endian */
|
||||||
|
|
||||||
|
|
@ -125,9 +125,9 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode,
|
||||||
{
|
{
|
||||||
void *oldstr;
|
void *oldstr;
|
||||||
|
|
||||||
/* Shortcut if there's nothing to do. */
|
/* Shortcut if there's nothing much to do. */
|
||||||
if (unicode->length == length)
|
if (unicode->length == length)
|
||||||
return 0;
|
goto reset;
|
||||||
|
|
||||||
/* Resizing unicode_empty is not allowed. */
|
/* Resizing unicode_empty is not allowed. */
|
||||||
if (unicode == unicode_empty) {
|
if (unicode == unicode_empty) {
|
||||||
|
|
@ -148,6 +148,7 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode,
|
||||||
unicode->str[length] = 0;
|
unicode->str[length] = 0;
|
||||||
unicode->length = length;
|
unicode->length = length;
|
||||||
|
|
||||||
|
reset:
|
||||||
/* Reset the object caches */
|
/* Reset the object caches */
|
||||||
if (unicode->utf8str) {
|
if (unicode->utf8str) {
|
||||||
Py_DECREF(unicode->utf8str);
|
Py_DECREF(unicode->utf8str);
|
||||||
|
|
@ -158,6 +159,23 @@ int _PyUnicode_Resize(register PyUnicodeObject *unicode,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int PyUnicode_Resize(PyObject **unicode,
|
||||||
|
int length)
|
||||||
|
{
|
||||||
|
PyUnicodeObject *v;
|
||||||
|
|
||||||
|
if (unicode == NULL) {
|
||||||
|
PyErr_BadInternalCall();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
v = (PyUnicodeObject *)*unicode;
|
||||||
|
if (v == NULL || !PyUnicode_Check(v) || v->ob_refcnt != 1) {
|
||||||
|
PyErr_BadInternalCall();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return _PyUnicode_Resize(v, length);
|
||||||
|
}
|
||||||
|
|
||||||
/* We allocate one more byte to make sure the string is
|
/* We allocate one more byte to make sure the string is
|
||||||
Ux0000 terminated -- XXX is this needed ?
|
Ux0000 terminated -- XXX is this needed ?
|
||||||
|
|
||||||
|
|
@ -185,7 +203,9 @@ PyUnicodeObject *_PyUnicode_New(int length)
|
||||||
unicode->ob_type = &PyUnicode_Type;
|
unicode->ob_type = &PyUnicode_Type;
|
||||||
_Py_NewReference((PyObject *)unicode);
|
_Py_NewReference((PyObject *)unicode);
|
||||||
if (unicode->str) {
|
if (unicode->str) {
|
||||||
if (unicode->length < length &&
|
/* Keep-Alive optimization: we only upsize the buffer,
|
||||||
|
never downsize it. */
|
||||||
|
if ((unicode->length < length) &&
|
||||||
_PyUnicode_Resize(unicode, length)) {
|
_PyUnicode_Resize(unicode, length)) {
|
||||||
free(unicode->str);
|
free(unicode->str);
|
||||||
PyMem_DEL(unicode);
|
PyMem_DEL(unicode);
|
||||||
|
|
@ -220,19 +240,25 @@ PyUnicodeObject *_PyUnicode_New(int length)
|
||||||
static
|
static
|
||||||
void _PyUnicode_Free(register PyUnicodeObject *unicode)
|
void _PyUnicode_Free(register PyUnicodeObject *unicode)
|
||||||
{
|
{
|
||||||
Py_XDECREF(unicode->utf8str);
|
|
||||||
if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
|
if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
|
||||||
if (unicode->length >= STAYALIVE_SIZE_LIMIT) {
|
/* Keep-Alive optimization */
|
||||||
|
if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
|
||||||
free(unicode->str);
|
free(unicode->str);
|
||||||
unicode->str = NULL;
|
unicode->str = NULL;
|
||||||
unicode->length = 0;
|
unicode->length = 0;
|
||||||
}
|
}
|
||||||
|
if (unicode->utf8str) {
|
||||||
|
Py_DECREF(unicode->utf8str);
|
||||||
|
unicode->utf8str = NULL;
|
||||||
|
}
|
||||||
|
/* Add to free list */
|
||||||
*(PyUnicodeObject **)unicode = unicode_freelist;
|
*(PyUnicodeObject **)unicode = unicode_freelist;
|
||||||
unicode_freelist = unicode;
|
unicode_freelist = unicode;
|
||||||
unicode_freelist_size++;
|
unicode_freelist_size++;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
free(unicode->str);
|
free(unicode->str);
|
||||||
|
Py_XDECREF(unicode->utf8str);
|
||||||
PyMem_DEL(unicode);
|
PyMem_DEL(unicode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -665,7 +691,8 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
_PyString_Resize(&v, p - q);
|
if (_PyString_Resize(&v, p - q))
|
||||||
|
goto onError;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
return v;
|
return v;
|
||||||
|
|
@ -1047,7 +1074,8 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_PyUnicode_Resize(v, (int)(p - buf));
|
if (_PyUnicode_Resize(v, (int)(p - buf)))
|
||||||
|
goto onError;
|
||||||
return (PyObject *)v;
|
return (PyObject *)v;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
@ -1119,9 +1147,14 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
|
||||||
*p++ = q[1];
|
*p++ = q[1];
|
||||||
|
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
_PyString_Resize(&repr, p - q);
|
if (_PyString_Resize(&repr, p - q))
|
||||||
|
goto onError;
|
||||||
|
|
||||||
return repr;
|
return repr;
|
||||||
|
|
||||||
|
onError:
|
||||||
|
Py_DECREF(repr);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
|
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
|
||||||
|
|
@ -1209,7 +1242,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
s += i;
|
s += i;
|
||||||
*p++ = x;
|
*p++ = x;
|
||||||
}
|
}
|
||||||
_PyUnicode_Resize(v, (int)(p - buf));
|
if (_PyUnicode_Resize(v, (int)(p - buf)))
|
||||||
|
goto onError;
|
||||||
return (PyObject *)v;
|
return (PyObject *)v;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
@ -1247,9 +1281,14 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
||||||
*p++ = (char) ch;
|
*p++ = (char) ch;
|
||||||
}
|
}
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
_PyString_Resize(&repr, p - q);
|
if (_PyString_Resize(&repr, p - q))
|
||||||
|
goto onError;
|
||||||
|
|
||||||
return repr;
|
return repr;
|
||||||
|
|
||||||
|
onError:
|
||||||
|
Py_DECREF(repr);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
|
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
|
||||||
|
|
@ -1305,6 +1344,7 @@ int latin1_encoding_error(const Py_UNICODE **source,
|
||||||
}
|
}
|
||||||
else if (strcmp(errors,"replace") == 0) {
|
else if (strcmp(errors,"replace") == 0) {
|
||||||
**dest = '?';
|
**dest = '?';
|
||||||
|
(*dest)++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -1321,12 +1361,13 @@ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
PyObject *repr;
|
PyObject *repr;
|
||||||
char *s;
|
char *s, *start;
|
||||||
repr = PyString_FromStringAndSize(NULL, size);
|
repr = PyString_FromStringAndSize(NULL, size);
|
||||||
if (repr == NULL)
|
if (repr == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
s = PyString_AS_STRING(repr);
|
s = PyString_AS_STRING(repr);
|
||||||
|
start = s;
|
||||||
while (size-- > 0) {
|
while (size-- > 0) {
|
||||||
Py_UNICODE ch = *p++;
|
Py_UNICODE ch = *p++;
|
||||||
if (ch >= 256) {
|
if (ch >= 256) {
|
||||||
|
|
@ -1337,6 +1378,10 @@ PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
||||||
else
|
else
|
||||||
*s++ = (char)ch;
|
*s++ = (char)ch;
|
||||||
}
|
}
|
||||||
|
/* Resize if error handling skipped some characters */
|
||||||
|
if (s - start < PyString_GET_SIZE(repr))
|
||||||
|
if (_PyString_Resize(&repr, s - start))
|
||||||
|
goto onError;
|
||||||
return repr;
|
return repr;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
@ -1411,8 +1456,9 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
|
||||||
"ordinal not in range(128)"))
|
"ordinal not in range(128)"))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
if (p - PyUnicode_AS_UNICODE(v) < size)
|
if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
|
||||||
_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)));
|
if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))))
|
||||||
|
goto onError;
|
||||||
return (PyObject *)v;
|
return (PyObject *)v;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
@ -1438,6 +1484,7 @@ int ascii_encoding_error(const Py_UNICODE **source,
|
||||||
}
|
}
|
||||||
else if (strcmp(errors,"replace") == 0) {
|
else if (strcmp(errors,"replace") == 0) {
|
||||||
**dest = '?';
|
**dest = '?';
|
||||||
|
(*dest)++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -1454,12 +1501,13 @@ PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
PyObject *repr;
|
PyObject *repr;
|
||||||
char *s;
|
char *s, *start;
|
||||||
repr = PyString_FromStringAndSize(NULL, size);
|
repr = PyString_FromStringAndSize(NULL, size);
|
||||||
if (repr == NULL)
|
if (repr == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
s = PyString_AS_STRING(repr);
|
s = PyString_AS_STRING(repr);
|
||||||
|
start = s;
|
||||||
while (size-- > 0) {
|
while (size-- > 0) {
|
||||||
Py_UNICODE ch = *p++;
|
Py_UNICODE ch = *p++;
|
||||||
if (ch >= 128) {
|
if (ch >= 128) {
|
||||||
|
|
@ -1470,6 +1518,10 @@ PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
|
||||||
else
|
else
|
||||||
*s++ = (char)ch;
|
*s++ = (char)ch;
|
||||||
}
|
}
|
||||||
|
/* Resize if error handling skipped some characters */
|
||||||
|
if (s - start < PyString_GET_SIZE(repr))
|
||||||
|
if (_PyString_Resize(&repr, s - start))
|
||||||
|
goto onError;
|
||||||
return repr;
|
return repr;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
@ -1898,7 +1950,8 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *s,
|
||||||
Py_DECREF(x);
|
Py_DECREF(x);
|
||||||
}
|
}
|
||||||
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
|
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
|
||||||
_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)));
|
if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))))
|
||||||
|
goto onError;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
return (PyObject *)v;
|
return (PyObject *)v;
|
||||||
|
|
@ -1959,7 +2012,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (0 < ch && ch < 256) {
|
if (0 < ch && ch < 256) {
|
||||||
*output++ = (char) ch;
|
*output++ = ch;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* All other characters are considered invalid */
|
/* All other characters are considered invalid */
|
||||||
|
|
@ -4539,7 +4592,8 @@ PyObject *PyUnicode_Format(PyObject *format,
|
||||||
Py_DECREF(args);
|
Py_DECREF(args);
|
||||||
}
|
}
|
||||||
Py_DECREF(uformat);
|
Py_DECREF(uformat);
|
||||||
_PyUnicode_Resize(result, reslen - rescnt);
|
if (_PyUnicode_Resize(result, reslen - rescnt))
|
||||||
|
goto onError;
|
||||||
return (PyObject *)result;
|
return (PyObject *)result;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
@ -4605,6 +4659,9 @@ _PyUnicode_Fini()
|
||||||
while (u != NULL) {
|
while (u != NULL) {
|
||||||
PyUnicodeObject *v = u;
|
PyUnicodeObject *v = u;
|
||||||
u = *(PyUnicodeObject **)u;
|
u = *(PyUnicodeObject **)u;
|
||||||
|
if (v->str)
|
||||||
|
free(v->str);
|
||||||
|
Py_XDECREF(v->utf8str);
|
||||||
free(v);
|
free(v);
|
||||||
}
|
}
|
||||||
Py_XDECREF(unicode_empty);
|
Py_XDECREF(unicode_empty);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue