Support datetime. (#394)

This commit is contained in:
Inada Naoki 2019-12-11 23:48:16 +09:00 committed by GitHub
parent 5fd6119093
commit 2186455d15
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 222 additions and 24 deletions

View file

@ -4,7 +4,7 @@ all: cython
.PHONY: black
black:
black msgpack/ test/
black msgpack/ test/ setup.py
.PHONY: cython
cython:

View file

@ -1,4 +1,11 @@
# coding: utf-8
#cython: embedsignature=True, c_string_encoding=ascii, language_level=3
from cpython.datetime cimport import_datetime, datetime_new
import_datetime()
import datetime
cdef object utc = datetime.timezone.utc
cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc)
include "_packer.pyx"
include "_unpacker.pyx"

View file

@ -2,6 +2,10 @@
from cpython cimport *
from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact
from cpython.datetime cimport (
PyDateTime_CheckExact, PyDelta_CheckExact,
datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds,
)
cdef ExtType
cdef Timestamp
@ -99,8 +103,9 @@ cdef class Packer(object):
cdef object _berrors
cdef const char *unicode_errors
cdef bint strict_types
cdef bool use_float
cdef bint use_float
cdef bint autoreset
cdef bint datetime
def __cinit__(self):
cdef int buf_size = 1024*1024
@ -110,12 +115,13 @@ cdef class Packer(object):
self.pk.buf_size = buf_size
self.pk.length = 0
def __init__(self, *, default=None, unicode_errors=None,
def __init__(self, *, default=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
bint strict_types=False):
bint strict_types=False, bint datetime=False, unicode_errors=None):
self.use_float = use_single_float
self.strict_types = strict_types
self.autoreset = autoreset
self.datetime = datetime
self.pk.use_bin_type = use_bin_type
if default is not None:
if not PyCallable_Check(default):
@ -262,6 +268,13 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L)
PyBuffer_Release(&view);
elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
delta = o - epoch
if not PyDelta_CheckExact(delta):
raise ValueError("failed to calculate delta")
llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta)
ulval = timedelta_microseconds(delta) * 1000
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
elif not default_used and self._default:
o = self._default(o)
default_used = 1

View file

@ -1,7 +1,6 @@
# coding: utf-8
from cpython cimport *
cdef extern from "Python.h":
ctypedef struct PyObject
cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1
@ -21,6 +20,8 @@ from .exceptions import (
)
from .ext import ExtType, Timestamp
cdef object giga = 1_000_000_000
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
@ -28,10 +29,13 @@ cdef extern from "unpack.h":
bint raw
bint has_pairs_hook # call object_hook with k-v pairs
bint strict_map_key
int timestamp
PyObject* object_hook
PyObject* list_hook
PyObject* ext_hook
PyObject* timestamp_t
PyObject *giga;
PyObject *utc;
char *unicode_errors
Py_ssize_t max_str_len
Py_ssize_t max_bin_len
@ -57,7 +61,8 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook,
object list_hook, object ext_hook,
bint use_list, bint raw, bint strict_map_key,
bint use_list, bint raw, int timestamp,
bint strict_map_key,
const char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
@ -99,8 +104,14 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("ext_hook must be a callable.")
ctx.user.ext_hook = <PyObject*>ext_hook
if timestamp < 0 or 3 < timestamp:
raise ValueError("timestamp must be 0..3")
# Add Timestamp type to the user object so it may be used in unpack.h
ctx.user.timestamp = timestamp
ctx.user.timestamp_t = <PyObject*>Timestamp
ctx.user.giga = <PyObject*>giga
ctx.user.utc = <PyObject*>utc
ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data):
@ -131,7 +142,7 @@ cdef inline int get_data_from_buffer(object obj,
def unpackb(object packed, *, object object_hook=None, object list_hook=None,
bint use_list=True, bint raw=False, bint strict_map_key=True,
bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True,
unicode_errors=None,
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=-1,
@ -179,7 +190,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None,
try:
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
use_list, raw, strict_map_key, cerr,
use_list, raw, timestamp, strict_map_key, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
@ -304,7 +315,7 @@ cdef class Unpacker(object):
self.buf = NULL
def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
bint use_list=True, bint raw=False, bint strict_map_key=True,
bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024,
object ext_hook=ExtType,
@ -359,7 +370,7 @@ cdef class Unpacker(object):
cerr = unicode_errors
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_hook, use_list, raw, strict_map_key, cerr,
ext_hook, use_list, raw, timestamp, strict_map_key, cerr,
max_str_len, max_bin_len, max_array_len,
max_map_len, max_ext_len)

View file

@ -1,12 +1,18 @@
# coding: utf-8
from collections import namedtuple
import datetime
import sys
import struct
PY2 = sys.version_info[0] == 2
if not PY2:
long = int
try:
_utc = datetime.timezone.utc
except AttributeError:
_utc = datetime.timezone(datetime.timedelta(0))
class ExtType(namedtuple("ExtType", "code data")):
@ -131,7 +137,7 @@ class Timestamp(object):
data = struct.pack("!Iq", self.nanoseconds, self.seconds)
return data
def to_float_s(self):
def to_float(self):
"""Get the timestamp as a floating-point value.
:returns: posix timestamp
@ -139,6 +145,12 @@ class Timestamp(object):
"""
return self.seconds + self.nanoseconds / 1e9
@staticmethod
def from_float(unix_float):
seconds = int(unix_float)
nanoseconds = int((unix_float % 1) * 1000000000)
return Timestamp(seconds, nanoseconds)
def to_unix_ns(self):
"""Get the timestamp as a unixtime in nanoseconds.
@ -146,3 +158,16 @@ class Timestamp(object):
:rtype: int
"""
return int(self.seconds * 1e9 + self.nanoseconds)
if not PY2:
def to_datetime(self):
"""Get the timestamp as a UTC datetime.
:rtype: datetime.
"""
return datetime.datetime.fromtimestamp(self.to_float(), _utc)
@staticmethod
def from_datetime(dt):
return Timestamp.from_float(dt.timestamp())

View file

@ -1,5 +1,6 @@
"""Fallback pure Python implementation of msgpack"""
from datetime import datetime as _DateTime
import sys
import struct
@ -174,6 +175,14 @@ class Unpacker(object):
If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param int timestamp:
Control how timestamp type is unpacked:
0 - Tiemstamp
1 - float (Seconds from the EPOCH)
2 - int (Nanoseconds from the EPOCH)
3 - datetime.datetime (UTC). Python 2 is not supported.
:param bool strict_map_key:
If true (default), only str or bytes are accepted for map (dict) keys.
@ -248,6 +257,7 @@ class Unpacker(object):
read_size=0,
use_list=True,
raw=False,
timestamp=0,
strict_map_key=True,
object_hook=None,
object_pairs_hook=None,
@ -307,6 +317,9 @@ class Unpacker(object):
self._strict_map_key = bool(strict_map_key)
self._unicode_errors = unicode_errors
self._use_list = use_list
if not (0 <= timestamp <= 3):
raise ValueError("timestamp must be 0..3")
self._timestamp = timestamp
self._list_hook = list_hook
self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook
@ -672,10 +685,21 @@ class Unpacker(object):
else:
obj = obj.decode("utf_8", self._unicode_errors)
return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
if typ == TYPE_BIN:
return bytes(obj)
if typ == TYPE_EXT:
if n == -1: # timestamp
ts = Timestamp.from_bytes(bytes(obj))
if self._timestamp == 1:
return ts.to_float()
elif self._timestamp == 2:
return ts.to_unix_ns()
elif self._timestamp == 3:
return ts.to_datetime()
else:
return ts
else:
return self._ext_hook(n, bytes(obj))
assert typ == TYPE_IMMEDIATE
return obj
@ -756,6 +780,12 @@ class Packer(object):
This is useful when trying to implement accurate serialization
for python types.
:param bool datetime:
If set to true, datetime with tzinfo is packed into Timestamp type.
Note that the tzinfo is stripped in the timestamp.
You can get UTC datetime with `timestamp=3` option of the Unapcker.
(Python 2 is not supported).
:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage.
@ -764,18 +794,22 @@ class Packer(object):
def __init__(
self,
default=None,
unicode_errors=None,
use_single_float=False,
autoreset=True,
use_bin_type=True,
strict_types=False,
datetime=False,
unicode_errors=None,
):
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._unicode_errors = unicode_errors or "strict"
self._buffer = StringIO()
if PY2 and datetime:
raise ValueError("datetime is not supported in Python 2")
self._datetime = bool(datetime)
self._unicode_errors = unicode_errors or "strict"
if default is not None:
if not callable(default):
raise TypeError("default must be callable")
@ -891,6 +925,12 @@ class Packer(object):
return self._pack_map_pairs(
len(obj), dict_iteritems(obj), nest_limit - 1
)
if self._datetime and check(obj, _DateTime):
obj = Timestamp.from_datetime(obj)
default_used = 1
continue
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1

View file

@ -24,10 +24,13 @@ typedef struct unpack_user {
bool raw;
bool has_pairs_hook;
bool strict_map_key;
int timestamp;
PyObject *object_hook;
PyObject *list_hook;
PyObject *ext_hook;
PyObject *timestamp_t;
PyObject *giga;
PyObject *utc;
const char *unicode_errors;
Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
} unpack_user;
@ -268,7 +271,7 @@ typedef struct msgpack_timestamp {
/*
* Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h.
*/
static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) {
static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) {
switch (buflen) {
case 4:
ts->tv_nsec = 0;
@ -292,10 +295,11 @@ static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack
}
}
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
#include "datetime.h"
static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int length, msgpack_unpack_object* o)
{
PyObject *py;
int8_t typecode = (int8_t)*pos++;
if (!u->ext_hook) {
PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL");
@ -305,13 +309,67 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch
PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len);
return -1;
}
PyObject *py = NULL;
// length also includes the typecode, so the actual data is length-1
if (typecode == -1) {
msgpack_timestamp ts;
if (unpack_timestamp(pos, length-1, &ts) == 0) {
if (unpack_timestamp(pos, length-1, &ts) < 0) {
return -1;
}
if (u->timestamp == 2) { // int
PyObject *a = PyLong_FromLongLong(ts.tv_sec);
if (a == NULL) return -1;
PyObject *c = PyNumber_Multiply(a, u->giga);
Py_DECREF(a);
if (c == NULL) {
return -1;
}
PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec);
if (b == NULL) {
Py_DECREF(c);
return -1;
}
py = PyNumber_Add(c, b);
Py_DECREF(c);
Py_DECREF(b);
}
else if (u->timestamp == 0) { // Timestamp
py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec);
} else {
py = NULL;
}
else { // float or datetime
PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec);
if (a == NULL) return -1;
PyObject *b = PyNumber_TrueDivide(a, u->giga);
Py_DECREF(a);
if (b == NULL) return -1;
PyObject *c = PyLong_FromLongLong(ts.tv_sec);
if (c == NULL) {
Py_DECREF(b);
return -1;
}
a = PyNumber_Add(b, c);
Py_DECREF(b);
Py_DECREF(c);
if (u->timestamp == 3) { // datetime
PyObject *t = PyTuple_Pack(2, a, u->utc);
Py_DECREF(a);
if (t == NULL) {
return -1;
}
py = PyDateTime_FromTimestamp(t);
Py_DECREF(t);
} else { // float
py = a;
}
}
} else {
py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1);

View file

@ -1,5 +1,11 @@
import pytest
import sys
import datetime
import msgpack
from msgpack import Timestamp
from msgpack.ext import Timestamp
if sys.version_info[0] > 2:
from msgpack.ext import _utc
def test_timestamp():
@ -42,5 +48,43 @@ def test_timestamp():
def test_timestamp_to():
t = Timestamp(42, 14)
assert t.to_float_s() == 42.000000014
assert t.to_float() == 42.000000014
assert t.to_unix_ns() == 42000000014
@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only")
def test_timestamp_datetime():
t = Timestamp(42, 14)
assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc)
@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only")
def test_unpack_datetime():
t = Timestamp(42, 14)
packed = msgpack.packb(t)
unpacked = msgpack.unpackb(packed, timestamp=3)
assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc)
@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only")
def test_pack_datetime():
t = Timestamp(42, 14000)
dt = t.to_datetime()
assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc)
packed = msgpack.packb(dt, datetime=True)
packed2 = msgpack.packb(t)
assert packed == packed2
unpacked = msgpack.unpackb(packed)
print(packed, unpacked)
assert unpacked == t
unpacked = msgpack.unpackb(packed, timestamp=3)
assert unpacked == dt
x = []
packed = msgpack.packb(dt, datetime=False, default=x.append)
assert x
assert x[0] == dt
assert msgpack.unpackb(packed) is None