mirror of
https://github.com/msgpack/msgpack-python.git
synced 2025-10-19 20:03:16 +00:00
packer: Use PyUnicode_AsUTF8AndSize() for utf-8 (#272)
This commit is contained in:
parent
5534d0c7af
commit
60ef3879d7
3 changed files with 71 additions and 14 deletions
|
@ -9,6 +9,6 @@ for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do
|
|||
pushd test # prevent importing msgpack package in current directory.
|
||||
$PYBIN/python -c 'import sys; print(hex(sys.maxsize))'
|
||||
$PYBIN/python -c 'from msgpack import _packer, _unpacker'
|
||||
$PYBIN/py.test -v
|
||||
$PYBIN/pytest -v .
|
||||
popd
|
||||
done
|
||||
|
|
|
@ -13,6 +13,7 @@ cdef extern from "Python.h":
|
|||
int PyMemoryView_Check(object obj)
|
||||
int PyByteArray_Check(object obj)
|
||||
int PyByteArray_CheckExact(object obj)
|
||||
char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL
|
||||
|
||||
|
||||
cdef extern from "pack.h":
|
||||
|
@ -37,6 +38,7 @@ cdef extern from "pack.h":
|
|||
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
|
||||
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
|
||||
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
|
||||
int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
|
||||
|
||||
cdef int DEFAULT_RECURSE_LIMIT=511
|
||||
cdef long long ITEM_LIMIT = (2**32)-1
|
||||
|
@ -126,8 +128,12 @@ cdef class Packer(object):
|
|||
raise TypeError("default must be a callable.")
|
||||
self._default = default
|
||||
if encoding is None:
|
||||
self.encoding = 'utf_8'
|
||||
self.unicode_errors = NULL
|
||||
if unicode_errors is None:
|
||||
self.encoding = NULL
|
||||
self.unicode_errors = NULL
|
||||
else:
|
||||
self.encoding = "utf_8"
|
||||
self.unicode_errors = unicode_errors
|
||||
else:
|
||||
if isinstance(encoding, unicode):
|
||||
self._bencoding = encoding.encode('ascii')
|
||||
|
@ -140,6 +146,8 @@ cdef class Packer(object):
|
|||
self._berrors = unicode_errors
|
||||
if self._berrors is not None:
|
||||
self.unicode_errors = PyBytes_AsString(self._berrors)
|
||||
else:
|
||||
self.unicode_errors = NULL
|
||||
|
||||
def __dealloc__(self):
|
||||
PyMem_Free(self.pk.buf)
|
||||
|
@ -206,17 +214,19 @@ cdef class Packer(object):
|
|||
if ret == 0:
|
||||
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
|
||||
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
|
||||
if not self.encoding:
|
||||
raise TypeError("Can't encode unicode string: no encoding is specified")
|
||||
#TODO: Use faster API for UTF-8
|
||||
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
|
||||
L = len(o)
|
||||
if L > ITEM_LIMIT:
|
||||
raise PackValueError("unicode string is too large")
|
||||
rawval = o
|
||||
ret = msgpack_pack_raw(&self.pk, L)
|
||||
if ret == 0:
|
||||
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
|
||||
if self.encoding == NULL:
|
||||
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
|
||||
if ret == -2:
|
||||
raise PackValueError("unicode string is too large")
|
||||
else:
|
||||
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
|
||||
L = len(o)
|
||||
if L > ITEM_LIMIT:
|
||||
raise PackValueError("unicode string is too large")
|
||||
ret = msgpack_pack_raw(&self.pk, L)
|
||||
if ret == 0:
|
||||
rawval = o
|
||||
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
|
||||
elif PyDict_CheckExact(o):
|
||||
d = <dict>o
|
||||
L = len(d)
|
||||
|
|
|
@ -67,6 +67,53 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_
|
|||
|
||||
#include "pack_template.h"
|
||||
|
||||
// return -2 when o is too long
|
||||
static inline int
|
||||
msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit)
|
||||
{
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
assert(PyUnicode_Check(o));
|
||||
|
||||
Py_ssize_t len;
|
||||
const char* buf = PyUnicode_AsUTF8AndSize(o, &len);
|
||||
if (buf == NULL)
|
||||
return -1;
|
||||
|
||||
if (len > limit) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
int ret = msgpack_pack_raw(pk, len);
|
||||
if (ret) return ret;
|
||||
|
||||
return msgpack_pack_raw_body(pk, buf, len);
|
||||
#else
|
||||
PyObject *bytes;
|
||||
Py_ssize_t len;
|
||||
int ret;
|
||||
|
||||
// py2
|
||||
bytes = PyUnicode_AsUTF8String(o);
|
||||
if (bytes == NULL)
|
||||
return -1;
|
||||
|
||||
len = PyString_GET_SIZE(bytes);
|
||||
if (len > limit) {
|
||||
Py_DECREF(bytes);
|
||||
return -2;
|
||||
}
|
||||
|
||||
ret = msgpack_pack_raw(pk, len);
|
||||
if (ret) {
|
||||
Py_DECREF(bytes);
|
||||
return -1;
|
||||
}
|
||||
ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len);
|
||||
Py_DECREF(bytes);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue