Cleanup decode_code_page_stateful() and encode_code_page()

* Fix decode_code_page_errors() result
 * Inline decode_code_page() and encode_code_page_chunk()
 * Replace the PyUnicodeObject type by PyObject
This commit is contained in:
Victor Stinner 2011-11-04 00:05:13 +01:00
parent a9e73640bf
commit 76a31a6bff
2 changed files with 78 additions and 111 deletions

View file

@ -1980,6 +1980,9 @@ def test_multibyte_encoding(self):
)) ))
def test_incremental(self): def test_incremental(self):
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
self.assertEqual(decoded, ('', 0))
decoded = codecs.code_page_decode(932, decoded = codecs.code_page_decode(932,
b'\xe9\x80\xe9', 'strict', b'\xe9\x80\xe9', 'strict',
False) False)

View file

@ -7006,7 +7006,7 @@ decode_code_page_flags(UINT code_page)
*/ */
static int static int
decode_code_page_strict(UINT code_page, decode_code_page_strict(UINT code_page,
PyUnicodeObject **v, PyObject **v,
const char *in, const char *in,
int insize) int insize)
{ {
@ -7022,7 +7022,7 @@ decode_code_page_strict(UINT code_page,
if (*v == NULL) { if (*v == NULL) {
/* Create unicode object */ /* Create unicode object */
*v = _PyUnicode_New(outsize); *v = (PyObject*)_PyUnicode_New(outsize);
if (*v == NULL) if (*v == NULL)
return -1; return -1;
out = PyUnicode_AS_UNICODE(*v); out = PyUnicode_AS_UNICODE(*v);
@ -7030,7 +7030,7 @@ decode_code_page_strict(UINT code_page,
else { else {
/* Extend unicode object */ /* Extend unicode object */
Py_ssize_t n = PyUnicode_GET_SIZE(*v); Py_ssize_t n = PyUnicode_GET_SIZE(*v);
if (PyUnicode_Resize((PyObject**)v, n + outsize) < 0) if (PyUnicode_Resize(v, n + outsize) < 0)
return -1; return -1;
out = PyUnicode_AS_UNICODE(*v) + n; out = PyUnicode_AS_UNICODE(*v) + n;
} }
@ -7057,9 +7057,8 @@ decode_code_page_strict(UINT code_page,
*/ */
static int static int
decode_code_page_errors(UINT code_page, decode_code_page_errors(UINT code_page,
PyUnicodeObject **v, PyObject **v,
const char *in, const char *in, const int size,
int size,
const char *errors) const char *errors)
{ {
const char *startin = in; const char *startin = in;
@ -7103,7 +7102,7 @@ decode_code_page_errors(UINT code_page,
PyErr_NoMemory(); PyErr_NoMemory();
goto error; goto error;
} }
*v = _PyUnicode_New(size * Py_ARRAY_LENGTH(buffer)); *v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
if (*v == NULL) if (*v == NULL)
goto error; goto error;
startout = PyUnicode_AS_UNICODE(*v); startout = PyUnicode_AS_UNICODE(*v);
@ -7115,7 +7114,7 @@ decode_code_page_errors(UINT code_page,
PyErr_NoMemory(); PyErr_NoMemory();
goto error; goto error;
} }
if (PyUnicode_Resize((PyObject**)v, n + size * Py_ARRAY_LENGTH(buffer)) < 0) if (PyUnicode_Resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0)
goto error; goto error;
startout = PyUnicode_AS_UNICODE(*v) + n; startout = PyUnicode_AS_UNICODE(*v) + n;
} }
@ -7173,9 +7172,9 @@ decode_code_page_errors(UINT code_page,
/* Extend unicode object */ /* Extend unicode object */
outsize = out - startout; outsize = out - startout;
assert(outsize <= PyUnicode_WSTR_LENGTH(*v)); assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
if (PyUnicode_Resize((PyObject**)v, outsize) < 0) if (PyUnicode_Resize(v, outsize) < 0)
goto error; goto error;
ret = 0; ret = size;
error: error:
Py_XDECREF(encoding_obj); Py_XDECREF(encoding_obj);
@ -7184,50 +7183,13 @@ decode_code_page_errors(UINT code_page,
return ret; return ret;
} }
/*
* Decode a byte string from a Windows code page into unicode object. If
* 'final' is set, converts trailing lead-byte too.
*
* Returns consumed size if succeed, or raise a WindowsError or
* UnicodeDecodeError exception and returns -1 on error.
*/
static int
decode_code_page(UINT code_page,
PyUnicodeObject **v,
const char *s, int size,
int final, const char *errors)
{
int done;
/* Skip trailing lead-byte unless 'final' is set */
if (size == 0) {
if (*v == NULL) {
Py_INCREF(unicode_empty);
*v = (PyUnicodeObject*)unicode_empty;
if (*v == NULL)
return -1;
}
return 0;
}
if (!final && is_dbcs_lead_byte(code_page, s, size - 1))
--size;
done = decode_code_page_strict(code_page, v, s, size);
if (done == -2)
done = decode_code_page_errors(code_page, v, s, size, errors);
return done;
}
static PyObject * static PyObject *
decode_code_page_stateful(int code_page, decode_code_page_stateful(int code_page,
const char *s, const char *s, Py_ssize_t size,
Py_ssize_t size, const char *errors, Py_ssize_t *consumed)
const char *errors,
Py_ssize_t *consumed)
{ {
PyUnicodeObject *v = NULL; PyObject *v = NULL;
int done; int chunk_size, final, converted, done;
if (code_page < 0) { if (code_page < 0) {
PyErr_SetString(PyExc_ValueError, "invalid code page number"); PyErr_SetString(PyExc_ValueError, "invalid code page number");
@ -7237,29 +7199,53 @@ decode_code_page_stateful(int code_page,
if (consumed) if (consumed)
*consumed = 0; *consumed = 0;
do
{
#ifdef NEED_RETRY #ifdef NEED_RETRY
retry: if (size > INT_MAX) {
if (size > INT_MAX) chunk_size = INT_MAX;
done = decode_code_page(code_page, &v, s, INT_MAX, 0, errors); final = 0;
done = 0;
}
else else
#endif #endif
done = decode_code_page(code_page, &v, s, (int)size, !consumed, errors); {
chunk_size = (int)size;
final = (consumed == NULL);
done = 1;
}
if (done < 0) { /* Skip trailing lead-byte unless 'final' is set */
if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1))
--chunk_size;
if (chunk_size == 0 && done) {
if (v != NULL)
break;
Py_INCREF(unicode_empty);
return unicode_empty;
}
converted = decode_code_page_strict(code_page, &v,
s, chunk_size);
if (converted == -2)
converted = decode_code_page_errors(code_page, &v,
s, chunk_size,
errors);
assert(converted != 0);
if (converted < 0) {
Py_XDECREF(v); Py_XDECREF(v);
return NULL; return NULL;
} }
if (consumed) if (consumed)
*consumed += done; *consumed += converted;
#ifdef NEED_RETRY s += converted;
if (size > INT_MAX) { size -= converted;
s += done; } while (!done);
size -= done;
goto retry;
}
#endif
#ifndef DONT_MAKE_RESULT_READY #ifndef DONT_MAKE_RESULT_READY
if (_PyUnicode_READY_REPLACE(&v)) { if (_PyUnicode_READY_REPLACE(&v)) {
@ -7268,7 +7254,7 @@ decode_code_page_stateful(int code_page,
} }
#endif #endif
assert(_PyUnicode_CheckConsistency(v, 1)); assert(_PyUnicode_CheckConsistency(v, 1));
return (PyObject *)v; return v;
} }
PyObject * PyObject *
@ -7583,40 +7569,6 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
return ret; return ret;
} }
/*
* Encode a Unicode string to a Windows code page into a byte string.
*
* Returns consumed characters if succeed, or raise a WindowsError and returns
* -1 on other error.
*/
static int
encode_code_page_chunk(UINT code_page, PyObject **outbytes,
PyObject *unicode, Py_ssize_t unicode_offset,
const Py_UNICODE *p, int size,
const char* errors)
{
int done;
if (size == 0) {
if (*outbytes == NULL) {
*outbytes = PyBytes_FromStringAndSize(NULL, 0);
if (*outbytes == NULL)
return -1;
}
return 0;
}
done = encode_code_page_strict(code_page, outbytes,
p, size,
errors);
if (done == -2)
done = encode_code_page_errors(code_page, outbytes,
unicode, unicode_offset,
p, size,
errors);
return done;
}
static PyObject * static PyObject *
encode_code_page(int code_page, encode_code_page(int code_page,
PyObject *unicode, PyObject *unicode,
@ -7626,7 +7578,7 @@ encode_code_page(int code_page,
Py_ssize_t size; Py_ssize_t size;
PyObject *outbytes = NULL; PyObject *outbytes = NULL;
Py_ssize_t offset; Py_ssize_t offset;
int chunk_len, ret; int chunk_len, ret, done;
p = PyUnicode_AsUnicodeAndSize(unicode, &size); p = PyUnicode_AsUnicodeAndSize(unicode, &size);
if (p == NULL) if (p == NULL)
@ -7637,20 +7589,32 @@ encode_code_page(int code_page,
return NULL; return NULL;
} }
if (size == 0)
return PyBytes_FromStringAndSize(NULL, 0);
offset = 0; offset = 0;
do do
{ {
#ifdef NEED_RETRY #ifdef NEED_RETRY
if (size > INT_MAX) if (size > INT_MAX) {
chunk_len = INT_MAX; chunk_len = INT_MAX;
done = 0;
}
else else
#endif #endif
{
chunk_len = (int)size; chunk_len = (int)size;
ret = encode_code_page_chunk(code_page, &outbytes, done = 1;
}
ret = encode_code_page_strict(code_page, &outbytes,
p, chunk_len,
errors);
if (ret == -2)
ret = encode_code_page_errors(code_page, &outbytes,
unicode, offset, unicode, offset,
p, chunk_len, p, chunk_len,
errors); errors);
if (ret < 0) { if (ret < 0) {
Py_XDECREF(outbytes); Py_XDECREF(outbytes);
return NULL; return NULL;
@ -7659,7 +7623,7 @@ encode_code_page(int code_page,
p += chunk_len; p += chunk_len;
offset += chunk_len; offset += chunk_len;
size -= chunk_len; size -= chunk_len;
} while (size != 0); } while (!done);
return outbytes; return outbytes;
} }