Add Timestamp support (#382)

This commit is contained in:
Inada Naoki 2019-12-05 18:29:15 +09:00 committed by GitHub
parent 2c6668941f
commit 641406902e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 283 additions and 20 deletions

View file

@ -27,6 +27,10 @@ API reference
.. autoclass:: ExtType .. autoclass:: ExtType
.. autoclass:: Timestamp
:members:
:special-members: __init__
exceptions exceptions
---------- ----------

View file

@ -1,22 +1,10 @@
# coding: utf-8 # coding: utf-8
from ._version import version from ._version import version
from .exceptions import * from .exceptions import *
from .ext import ExtType, Timestamp
import os import os
import sys import sys
from collections import namedtuple
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
if os.environ.get('MSGPACK_PUREPYTHON') or sys.version_info[0] == 2: if os.environ.get('MSGPACK_PUREPYTHON') or sys.version_info[0] == 2:

View file

@ -4,8 +4,9 @@ from cpython cimport *
from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact
cdef ExtType cdef ExtType
cdef Timestamp
from . import ExtType from .ext import ExtType, Timestamp
cdef extern from "Python.h": cdef extern from "Python.h":
@ -36,6 +37,7 @@ cdef extern from "pack.h":
int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds);
int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
cdef extern from "buff_converter.h": cdef extern from "buff_converter.h":
@ -135,6 +137,7 @@ cdef class Packer(object):
cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
cdef long long llval cdef long long llval
cdef unsigned long long ullval cdef unsigned long long ullval
cdef unsigned long ulval
cdef long longval cdef long longval
cdef float fval cdef float fval
cdef double dval cdef double dval
@ -238,6 +241,10 @@ cdef class Packer(object):
raise ValueError("EXT data is too large") raise ValueError("EXT data is too large")
ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_ext(&self.pk, longval, L)
ret = msgpack_pack_raw_body(&self.pk, rawval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif type(o) is Timestamp:
llval = o.seconds
ulval = o.nanoseconds
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
L = Py_SIZE(o) L = Py_SIZE(o)
if L > ITEM_LIMIT: if L > ITEM_LIMIT:

View file

@ -19,7 +19,7 @@ from .exceptions import (
FormatError, FormatError,
StackError, StackError,
) )
from . import ExtType from .ext import ExtType, Timestamp
cdef extern from "unpack.h": cdef extern from "unpack.h":
@ -31,6 +31,7 @@ cdef extern from "unpack.h":
PyObject* object_hook PyObject* object_hook
PyObject* list_hook PyObject* list_hook
PyObject* ext_hook PyObject* ext_hook
PyObject* timestamp_t
char *unicode_errors char *unicode_errors
Py_ssize_t max_str_len Py_ssize_t max_str_len
Py_ssize_t max_bin_len Py_ssize_t max_bin_len
@ -98,6 +99,8 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("ext_hook must be a callable.") raise TypeError("ext_hook must be a callable.")
ctx.user.ext_hook = <PyObject*>ext_hook ctx.user.ext_hook = <PyObject*>ext_hook
# Add Timestamp type to the user object so it may be used in unpack.h
ctx.user.timestamp_t = <PyObject*>Timestamp
ctx.user.unicode_errors = unicode_errors ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data): def default_read_extended_type(typecode, data):

136
msgpack/ext.py Normal file
View file

@ -0,0 +1,136 @@
# coding: utf-8
from collections import namedtuple
import sys
import struct
PY2 = sys.version_info[0] == 2
if not PY2:
long = int
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if code == -1:
return Timestamp.from_bytes(data)
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
class Timestamp(object):
"""Timestamp represents the Timestamp extension type in msgpack.
When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python
msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`.
"""
__slots__ = ["seconds", "nanoseconds"]
def __init__(self, seconds, nanoseconds=0):
"""Initialize a Timestamp object.
:param seconds: Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). May be
negative. If :code:`seconds` includes a fractional part, :code:`nanoseconds` must be 0.
:type seconds: int or float
:param nanoseconds: Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999.
Default is 0.
:type nanoseconds: int
Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns.
"""
if not isinstance(seconds, (int, long, float)):
raise TypeError("seconds must be numeric")
if not isinstance(nanoseconds, (int, long)):
raise TypeError("nanoseconds must be an integer")
if nanoseconds:
if nanoseconds < 0 or nanoseconds % 1 != 0 or nanoseconds > (1e9 - 1):
raise ValueError("nanoseconds must be a non-negative integer less than 999999999.")
if not isinstance(seconds, (int, long)):
raise ValueError("seconds must be an integer if also providing nanoseconds.")
self.nanoseconds = nanoseconds
else:
# round helps with floating point issues
self.nanoseconds = int(round(seconds % 1 * 1e9, 0))
self.seconds = int(seconds // 1)
def __repr__(self):
"""String representation of Timestamp."""
return "Timestamp(seconds={0}, nanoseconds={1})".format(self.seconds, self.nanoseconds)
def __eq__(self, other):
"""Check for equality with another Timestamp object"""
if type(other) is self.__class__:
return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
return False
def __ne__(self, other):
"""not-equals method (see :func:`__eq__()`)"""
return not self.__eq__(other)
@staticmethod
def from_bytes(b):
"""Unpack bytes into a `Timestamp` object.
Used for pure-Python msgpack unpacking.
:param b: Payload from msgpack ext message with code -1
:type b: bytes
:returns: Timestamp object unpacked from msgpack ext payload
:rtype: Timestamp
"""
if len(b) == 4:
seconds = struct.unpack("!L", b)[0]
nanoseconds = 0
elif len(b) == 8:
data64 = struct.unpack("!Q", b)[0]
seconds = data64 & 0x00000003ffffffff
nanoseconds = data64 >> 34
elif len(b) == 12:
nanoseconds, seconds = struct.unpack("!Iq", b)
else:
raise ValueError("Timestamp type can only be created from 32, 64, or 96-bit byte objects")
return Timestamp(seconds, nanoseconds)
def to_bytes(self):
"""Pack this Timestamp object into bytes.
Used for pure-Python msgpack packing.
:returns data: Payload for EXT message with code -1 (timestamp type)
:rtype: bytes
"""
if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits
data64 = self.nanoseconds << 34 | self.seconds
if data64 & 0xffffffff00000000 == 0:
# nanoseconds is zero and seconds < 2**32, so timestamp 32
data = struct.pack("!L", data64)
else:
# timestamp 64
data = struct.pack("!Q", data64)
else:
# timestamp 96
data = struct.pack("!Iq", self.nanoseconds, self.seconds)
return data
def to_float_s(self):
"""Get the timestamp as a floating-point value.
:returns: posix timestamp
:rtype: float
"""
return self.seconds + self.nanoseconds/1e9
def to_unix_ns(self):
"""Get the timestamp as a unixtime in nanoseconds.
:returns: posix timestamp in nanoseconds
:rtype: int
"""
return int(self.seconds * 1e9 + self.nanoseconds)

View file

@ -66,7 +66,7 @@ from .exceptions import (
StackError, StackError,
) )
from . import ExtType from .ext import ExtType, Timestamp
EX_SKIP = 0 EX_SKIP = 0
@ -826,7 +826,11 @@ class Packer(object):
if self._use_float: if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xca, obj)) return self._buffer.write(struct.pack(">Bf", 0xca, obj))
return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
if check(obj, ExtType): if check(obj, (ExtType, Timestamp)):
if check(obj, Timestamp):
code = -1
data = obj.to_bytes()
else:
code = obj.code code = obj.code
data = obj.data data = obj.data
assert isinstance(code, int) assert isinstance(code, int)

View file

@ -759,6 +759,39 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l)
} }
/*
* Pack Timestamp extension type. Follows msgpack-c pack_template.h.
*/
static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds)
{
if ((seconds >> 34) == 0) {
/* seconds is unsigned and fits in 34 bits */
uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds;
if ((data64 & 0xffffffff00000000L) == 0) {
/* no nanoseconds and seconds is 32bits or smaller. timestamp32. */
unsigned char buf[4];
uint32_t data32 = (uint32_t)data64;
msgpack_pack_ext(x, -1, 4);
_msgpack_store32(buf, data32);
msgpack_pack_raw_body(x, buf, 4);
} else {
/* timestamp64 */
unsigned char buf[8];
msgpack_pack_ext(x, -1, 8);
_msgpack_store64(buf, data64);
msgpack_pack_raw_body(x, buf, 8);
}
} else {
/* seconds is signed or >34bits */
unsigned char buf[12];
_msgpack_store32(&buf[0], nanoseconds);
_msgpack_store64(&buf[4], seconds);
msgpack_pack_ext(x, -1, 12);
msgpack_pack_raw_body(x, buf, 12);
}
return 0;
}
#undef msgpack_pack_append_buffer #undef msgpack_pack_append_buffer

View file

@ -27,6 +27,7 @@ typedef struct unpack_user {
PyObject *object_hook; PyObject *object_hook;
PyObject *list_hook; PyObject *list_hook;
PyObject *ext_hook; PyObject *ext_hook;
PyObject *timestamp_t;
const char *unicode_errors; const char *unicode_errors;
Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
} unpack_user; } unpack_user;
@ -259,6 +260,38 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0; return 0;
} }
typedef struct msgpack_timestamp {
int64_t tv_sec;
uint32_t tv_nsec;
} msgpack_timestamp;
/*
* Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h.
*/
static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) {
switch (buflen) {
case 4:
ts->tv_nsec = 0;
{
uint32_t v = _msgpack_load32(uint32_t, buf);
ts->tv_sec = (int64_t)v;
}
return 0;
case 8: {
uint64_t value =_msgpack_load64(uint64_t, buf);
ts->tv_nsec = (uint32_t)(value >> 34);
ts->tv_sec = value & 0x00000003ffffffffLL;
return 0;
}
case 12:
ts->tv_nsec = _msgpack_load32(uint32_t, buf);
ts->tv_sec = _msgpack_load64(int64_t, buf + 4);
return 0;
default:
return -1;
}
}
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int length, msgpack_unpack_object* o) unsigned int length, msgpack_unpack_object* o)
{ {
@ -273,7 +306,16 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch
return -1; return -1;
} }
// length also includes the typecode, so the actual data is length-1 // length also includes the typecode, so the actual data is length-1
if (typecode == -1) {
msgpack_timestamp ts;
if (unpack_timestamp(pos, length-1, &ts) == 0) {
py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec);
} else {
py = NULL;
}
} else {
py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1);
}
if (!py) if (!py)
return -1; return -1;
*o = py; *o = py;

46
test/test_timestamp.py Normal file
View file

@ -0,0 +1,46 @@
import msgpack
from msgpack import Timestamp
def test_timestamp():
# timestamp32
ts = Timestamp(2**32 - 1)
assert ts.to_bytes() == b"\xff\xff\xff\xff"
packed = msgpack.packb(ts)
assert packed == b"\xd6\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0
# timestamp64
ts = Timestamp(2**34 - 1, 999999999)
assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff"
packed = msgpack.packb(ts)
assert packed == b"\xd7\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999
# timestamp96
ts = Timestamp(2**63 - 1, 999999999)
assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff"
packed = msgpack.packb(ts)
assert packed == b"\xc7\x0c\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999
# negative fractional
ts = Timestamp(-2.3) #s: -3, ns: 700000000
assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd"
packed = msgpack.packb(ts)
assert packed == b"\xc7\x0c\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == -3 and ts.nanoseconds == 700000000
def test_timestamp_to():
t = Timestamp(42, 14)
assert t.to_float_s() == 42.000000014
assert t.to_unix_ns() == 42000000014