Support datetime. (#394)

This commit is contained in:
Inada Naoki 2019-12-11 23:48:16 +09:00 committed by GitHub
parent 5fd6119093
commit 2186455d15
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 222 additions and 24 deletions

View file

@ -4,7 +4,7 @@ all: cython
.PHONY: black .PHONY: black
black: black:
black msgpack/ test/ black msgpack/ test/ setup.py
.PHONY: cython .PHONY: cython
cython: cython:

View file

@ -1,4 +1,11 @@
# coding: utf-8 # coding: utf-8
#cython: embedsignature=True, c_string_encoding=ascii, language_level=3 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3
from cpython.datetime cimport import_datetime, datetime_new
import_datetime()
import datetime
cdef object utc = datetime.timezone.utc
cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc)
include "_packer.pyx" include "_packer.pyx"
include "_unpacker.pyx" include "_unpacker.pyx"

View file

@ -2,6 +2,10 @@
from cpython cimport * from cpython cimport *
from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact
from cpython.datetime cimport (
PyDateTime_CheckExact, PyDelta_CheckExact,
datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds,
)
cdef ExtType cdef ExtType
cdef Timestamp cdef Timestamp
@ -99,8 +103,9 @@ cdef class Packer(object):
cdef object _berrors cdef object _berrors
cdef const char *unicode_errors cdef const char *unicode_errors
cdef bint strict_types cdef bint strict_types
cdef bool use_float cdef bint use_float
cdef bint autoreset cdef bint autoreset
cdef bint datetime
def __cinit__(self): def __cinit__(self):
cdef int buf_size = 1024*1024 cdef int buf_size = 1024*1024
@ -110,12 +115,13 @@ cdef class Packer(object):
self.pk.buf_size = buf_size self.pk.buf_size = buf_size
self.pk.length = 0 self.pk.length = 0
def __init__(self, *, default=None, unicode_errors=None, def __init__(self, *, default=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
bint strict_types=False): bint strict_types=False, bint datetime=False, unicode_errors=None):
self.use_float = use_single_float self.use_float = use_single_float
self.strict_types = strict_types self.strict_types = strict_types
self.autoreset = autoreset self.autoreset = autoreset
self.datetime = datetime
self.pk.use_bin_type = use_bin_type self.pk.use_bin_type = use_bin_type
if default is not None: if default is not None:
if not PyCallable_Check(default): if not PyCallable_Check(default):
@ -262,6 +268,13 @@ cdef class Packer(object):
if ret == 0: if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L) ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L)
PyBuffer_Release(&view); PyBuffer_Release(&view);
elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
delta = o - epoch
if not PyDelta_CheckExact(delta):
raise ValueError("failed to calculate delta")
llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta)
ulval = timedelta_microseconds(delta) * 1000
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
elif not default_used and self._default: elif not default_used and self._default:
o = self._default(o) o = self._default(o)
default_used = 1 default_used = 1

View file

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from cpython cimport * from cpython cimport *
cdef extern from "Python.h": cdef extern from "Python.h":
ctypedef struct PyObject ctypedef struct PyObject
cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1
@ -21,6 +20,8 @@ from .exceptions import (
) )
from .ext import ExtType, Timestamp from .ext import ExtType, Timestamp
cdef object giga = 1_000_000_000
cdef extern from "unpack.h": cdef extern from "unpack.h":
ctypedef struct msgpack_user: ctypedef struct msgpack_user:
@ -28,10 +29,13 @@ cdef extern from "unpack.h":
bint raw bint raw
bint has_pairs_hook # call object_hook with k-v pairs bint has_pairs_hook # call object_hook with k-v pairs
bint strict_map_key bint strict_map_key
int timestamp
PyObject* object_hook PyObject* object_hook
PyObject* list_hook PyObject* list_hook
PyObject* ext_hook PyObject* ext_hook
PyObject* timestamp_t PyObject* timestamp_t
PyObject *giga;
PyObject *utc;
char *unicode_errors char *unicode_errors
Py_ssize_t max_str_len Py_ssize_t max_str_len
Py_ssize_t max_bin_len Py_ssize_t max_bin_len
@ -57,7 +61,8 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx, cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook, object object_hook, object object_pairs_hook,
object list_hook, object ext_hook, object list_hook, object ext_hook,
bint use_list, bint raw, bint strict_map_key, bint use_list, bint raw, int timestamp,
bint strict_map_key,
const char* unicode_errors, const char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len,
@ -99,8 +104,14 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("ext_hook must be a callable.") raise TypeError("ext_hook must be a callable.")
ctx.user.ext_hook = <PyObject*>ext_hook ctx.user.ext_hook = <PyObject*>ext_hook
if timestamp < 0 or 3 < timestamp:
raise ValueError("timestamp must be 0..3")
# Add Timestamp type to the user object so it may be used in unpack.h # Add Timestamp type to the user object so it may be used in unpack.h
ctx.user.timestamp = timestamp
ctx.user.timestamp_t = <PyObject*>Timestamp ctx.user.timestamp_t = <PyObject*>Timestamp
ctx.user.giga = <PyObject*>giga
ctx.user.utc = <PyObject*>utc
ctx.user.unicode_errors = unicode_errors ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data): def default_read_extended_type(typecode, data):
@ -131,7 +142,7 @@ cdef inline int get_data_from_buffer(object obj,
def unpackb(object packed, *, object object_hook=None, object list_hook=None, def unpackb(object packed, *, object object_hook=None, object list_hook=None,
bint use_list=True, bint raw=False, bint strict_map_key=True, bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True,
unicode_errors=None, unicode_errors=None,
object_pairs_hook=None, ext_hook=ExtType, object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=-1, Py_ssize_t max_str_len=-1,
@ -179,7 +190,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None,
try: try:
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
use_list, raw, strict_map_key, cerr, use_list, raw, timestamp, strict_map_key, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off) ret = unpack_construct(&ctx, buf, buf_len, &off)
finally: finally:
@ -304,7 +315,7 @@ cdef class Unpacker(object):
self.buf = NULL self.buf = NULL
def __init__(self, file_like=None, *, Py_ssize_t read_size=0, def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
bint use_list=True, bint raw=False, bint strict_map_key=True, bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True,
object object_hook=None, object object_pairs_hook=None, object list_hook=None, object object_hook=None, object object_pairs_hook=None, object list_hook=None,
unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024,
object ext_hook=ExtType, object ext_hook=ExtType,
@ -359,7 +370,7 @@ cdef class Unpacker(object):
cerr = unicode_errors cerr = unicode_errors
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_hook, use_list, raw, strict_map_key, cerr, ext_hook, use_list, raw, timestamp, strict_map_key, cerr,
max_str_len, max_bin_len, max_array_len, max_str_len, max_bin_len, max_array_len,
max_map_len, max_ext_len) max_map_len, max_ext_len)

View file

@ -1,12 +1,18 @@
# coding: utf-8 # coding: utf-8
from collections import namedtuple from collections import namedtuple
import datetime
import sys import sys
import struct import struct
PY2 = sys.version_info[0] == 2 PY2 = sys.version_info[0] == 2
if not PY2: if not PY2:
long = int long = int
try:
_utc = datetime.timezone.utc
except AttributeError:
_utc = datetime.timezone(datetime.timedelta(0))
class ExtType(namedtuple("ExtType", "code data")): class ExtType(namedtuple("ExtType", "code data")):
@ -131,7 +137,7 @@ class Timestamp(object):
data = struct.pack("!Iq", self.nanoseconds, self.seconds) data = struct.pack("!Iq", self.nanoseconds, self.seconds)
return data return data
def to_float_s(self): def to_float(self):
"""Get the timestamp as a floating-point value. """Get the timestamp as a floating-point value.
:returns: posix timestamp :returns: posix timestamp
@ -139,6 +145,12 @@ class Timestamp(object):
""" """
return self.seconds + self.nanoseconds / 1e9 return self.seconds + self.nanoseconds / 1e9
@staticmethod
def from_float(unix_float):
seconds = int(unix_float)
nanoseconds = int((unix_float % 1) * 1000000000)
return Timestamp(seconds, nanoseconds)
def to_unix_ns(self): def to_unix_ns(self):
"""Get the timestamp as a unixtime in nanoseconds. """Get the timestamp as a unixtime in nanoseconds.
@ -146,3 +158,16 @@ class Timestamp(object):
:rtype: int :rtype: int
""" """
return int(self.seconds * 1e9 + self.nanoseconds) return int(self.seconds * 1e9 + self.nanoseconds)
if not PY2:
def to_datetime(self):
"""Get the timestamp as a UTC datetime.
:rtype: datetime.
"""
return datetime.datetime.fromtimestamp(self.to_float(), _utc)
@staticmethod
def from_datetime(dt):
return Timestamp.from_float(dt.timestamp())

View file

@ -1,5 +1,6 @@
"""Fallback pure Python implementation of msgpack""" """Fallback pure Python implementation of msgpack"""
from datetime import datetime as _DateTime
import sys import sys
import struct import struct
@ -174,6 +175,14 @@ class Unpacker(object):
If true, unpack msgpack raw to Python bytes. If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param int timestamp:
Control how timestamp type is unpacked:
0 - Tiemstamp
1 - float (Seconds from the EPOCH)
2 - int (Nanoseconds from the EPOCH)
3 - datetime.datetime (UTC). Python 2 is not supported.
:param bool strict_map_key: :param bool strict_map_key:
If true (default), only str or bytes are accepted for map (dict) keys. If true (default), only str or bytes are accepted for map (dict) keys.
@ -248,6 +257,7 @@ class Unpacker(object):
read_size=0, read_size=0,
use_list=True, use_list=True,
raw=False, raw=False,
timestamp=0,
strict_map_key=True, strict_map_key=True,
object_hook=None, object_hook=None,
object_pairs_hook=None, object_pairs_hook=None,
@ -307,6 +317,9 @@ class Unpacker(object):
self._strict_map_key = bool(strict_map_key) self._strict_map_key = bool(strict_map_key)
self._unicode_errors = unicode_errors self._unicode_errors = unicode_errors
self._use_list = use_list self._use_list = use_list
if not (0 <= timestamp <= 3):
raise ValueError("timestamp must be 0..3")
self._timestamp = timestamp
self._list_hook = list_hook self._list_hook = list_hook
self._object_hook = object_hook self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook self._object_pairs_hook = object_pairs_hook
@ -672,10 +685,21 @@ class Unpacker(object):
else: else:
obj = obj.decode("utf_8", self._unicode_errors) obj = obj.decode("utf_8", self._unicode_errors)
return obj return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
if typ == TYPE_BIN: if typ == TYPE_BIN:
return bytes(obj) return bytes(obj)
if typ == TYPE_EXT:
if n == -1: # timestamp
ts = Timestamp.from_bytes(bytes(obj))
if self._timestamp == 1:
return ts.to_float()
elif self._timestamp == 2:
return ts.to_unix_ns()
elif self._timestamp == 3:
return ts.to_datetime()
else:
return ts
else:
return self._ext_hook(n, bytes(obj))
assert typ == TYPE_IMMEDIATE assert typ == TYPE_IMMEDIATE
return obj return obj
@ -756,6 +780,12 @@ class Packer(object):
This is useful when trying to implement accurate serialization This is useful when trying to implement accurate serialization
for python types. for python types.
:param bool datetime:
If set to true, datetime with tzinfo is packed into Timestamp type.
Note that the tzinfo is stripped in the timestamp.
You can get UTC datetime with `timestamp=3` option of the Unapcker.
(Python 2 is not supported).
:param str unicode_errors: :param str unicode_errors:
The error handler for encoding unicode. (default: 'strict') The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage. DO NOT USE THIS!! This option is kept for very specific usage.
@ -764,18 +794,22 @@ class Packer(object):
def __init__( def __init__(
self, self,
default=None, default=None,
unicode_errors=None,
use_single_float=False, use_single_float=False,
autoreset=True, autoreset=True,
use_bin_type=True, use_bin_type=True,
strict_types=False, strict_types=False,
datetime=False,
unicode_errors=None,
): ):
self._strict_types = strict_types self._strict_types = strict_types
self._use_float = use_single_float self._use_float = use_single_float
self._autoreset = autoreset self._autoreset = autoreset
self._use_bin_type = use_bin_type self._use_bin_type = use_bin_type
self._unicode_errors = unicode_errors or "strict"
self._buffer = StringIO() self._buffer = StringIO()
if PY2 and datetime:
raise ValueError("datetime is not supported in Python 2")
self._datetime = bool(datetime)
self._unicode_errors = unicode_errors or "strict"
if default is not None: if default is not None:
if not callable(default): if not callable(default):
raise TypeError("default must be callable") raise TypeError("default must be callable")
@ -891,6 +925,12 @@ class Packer(object):
return self._pack_map_pairs( return self._pack_map_pairs(
len(obj), dict_iteritems(obj), nest_limit - 1 len(obj), dict_iteritems(obj), nest_limit - 1
) )
if self._datetime and check(obj, _DateTime):
obj = Timestamp.from_datetime(obj)
default_used = 1
continue
if not default_used and self._default is not None: if not default_used and self._default is not None:
obj = self._default(obj) obj = self._default(obj)
default_used = 1 default_used = 1

View file

@ -24,10 +24,13 @@ typedef struct unpack_user {
bool raw; bool raw;
bool has_pairs_hook; bool has_pairs_hook;
bool strict_map_key; bool strict_map_key;
int timestamp;
PyObject *object_hook; PyObject *object_hook;
PyObject *list_hook; PyObject *list_hook;
PyObject *ext_hook; PyObject *ext_hook;
PyObject *timestamp_t; PyObject *timestamp_t;
PyObject *giga;
PyObject *utc;
const char *unicode_errors; const char *unicode_errors;
Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
} unpack_user; } unpack_user;
@ -268,7 +271,7 @@ typedef struct msgpack_timestamp {
/* /*
* Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h.
*/ */
static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) {
switch (buflen) { switch (buflen) {
case 4: case 4:
ts->tv_nsec = 0; ts->tv_nsec = 0;
@ -292,10 +295,11 @@ static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack
} }
} }
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, #include "datetime.h"
static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int length, msgpack_unpack_object* o) unsigned int length, msgpack_unpack_object* o)
{ {
PyObject *py;
int8_t typecode = (int8_t)*pos++; int8_t typecode = (int8_t)*pos++;
if (!u->ext_hook) { if (!u->ext_hook) {
PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL");
@ -305,13 +309,67 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch
PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len);
return -1; return -1;
} }
PyObject *py = NULL;
// length also includes the typecode, so the actual data is length-1 // length also includes the typecode, so the actual data is length-1
if (typecode == -1) { if (typecode == -1) {
msgpack_timestamp ts; msgpack_timestamp ts;
if (unpack_timestamp(pos, length-1, &ts) == 0) { if (unpack_timestamp(pos, length-1, &ts) < 0) {
return -1;
}
if (u->timestamp == 2) { // int
PyObject *a = PyLong_FromLongLong(ts.tv_sec);
if (a == NULL) return -1;
PyObject *c = PyNumber_Multiply(a, u->giga);
Py_DECREF(a);
if (c == NULL) {
return -1;
}
PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec);
if (b == NULL) {
Py_DECREF(c);
return -1;
}
py = PyNumber_Add(c, b);
Py_DECREF(c);
Py_DECREF(b);
}
else if (u->timestamp == 0) { // Timestamp
py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec);
} else { }
py = NULL; else { // float or datetime
PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec);
if (a == NULL) return -1;
PyObject *b = PyNumber_TrueDivide(a, u->giga);
Py_DECREF(a);
if (b == NULL) return -1;
PyObject *c = PyLong_FromLongLong(ts.tv_sec);
if (c == NULL) {
Py_DECREF(b);
return -1;
}
a = PyNumber_Add(b, c);
Py_DECREF(b);
Py_DECREF(c);
if (u->timestamp == 3) { // datetime
PyObject *t = PyTuple_Pack(2, a, u->utc);
Py_DECREF(a);
if (t == NULL) {
return -1;
}
py = PyDateTime_FromTimestamp(t);
Py_DECREF(t);
} else { // float
py = a;
}
} }
} else { } else {
py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1);

View file

@ -1,5 +1,11 @@
import pytest
import sys
import datetime
import msgpack import msgpack
from msgpack import Timestamp from msgpack.ext import Timestamp
if sys.version_info[0] > 2:
from msgpack.ext import _utc
def test_timestamp(): def test_timestamp():
@ -42,5 +48,43 @@ def test_timestamp():
def test_timestamp_to(): def test_timestamp_to():
t = Timestamp(42, 14) t = Timestamp(42, 14)
assert t.to_float_s() == 42.000000014 assert t.to_float() == 42.000000014
assert t.to_unix_ns() == 42000000014 assert t.to_unix_ns() == 42000000014
@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only")
def test_timestamp_datetime():
t = Timestamp(42, 14)
assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc)
@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only")
def test_unpack_datetime():
t = Timestamp(42, 14)
packed = msgpack.packb(t)
unpacked = msgpack.unpackb(packed, timestamp=3)
assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc)
@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only")
def test_pack_datetime():
t = Timestamp(42, 14000)
dt = t.to_datetime()
assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc)
packed = msgpack.packb(dt, datetime=True)
packed2 = msgpack.packb(t)
assert packed == packed2
unpacked = msgpack.unpackb(packed)
print(packed, unpacked)
assert unpacked == t
unpacked = msgpack.unpackb(packed, timestamp=3)
assert unpacked == dt
x = []
packed = msgpack.packb(dt, datetime=False, default=x.append)
assert x
assert x[0] == dt
assert msgpack.unpackb(packed) is None