Add Timestamp support (#382)

This commit is contained in:
Inada Naoki 2019-12-05 18:29:15 +09:00 committed by GitHub
parent 2c6668941f
commit 641406902e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 283 additions and 20 deletions

View file

@ -27,6 +27,10 @@ API reference
.. autoclass:: ExtType
.. autoclass:: Timestamp
:members:
:special-members: __init__
exceptions
----------

View file

@ -1,22 +1,10 @@
# coding: utf-8
from ._version import version
from .exceptions import *
from .ext import ExtType, Timestamp
import os
import sys
from collections import namedtuple
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
if os.environ.get('MSGPACK_PUREPYTHON') or sys.version_info[0] == 2:

View file

@ -4,8 +4,9 @@ from cpython cimport *
from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact
cdef ExtType
cdef Timestamp
from . import ExtType
from .ext import ExtType, Timestamp
cdef extern from "Python.h":
@ -36,6 +37,7 @@ cdef extern from "pack.h":
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds);
int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
cdef extern from "buff_converter.h":
@ -135,6 +137,7 @@ cdef class Packer(object):
cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
cdef long long llval
cdef unsigned long long ullval
cdef unsigned long ulval
cdef long longval
cdef float fval
cdef double dval
@ -238,6 +241,10 @@ cdef class Packer(object):
raise ValueError("EXT data is too large")
ret = msgpack_pack_ext(&self.pk, longval, L)
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif type(o) is Timestamp:
llval = o.seconds
ulval = o.nanoseconds
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
L = Py_SIZE(o)
if L > ITEM_LIMIT:

View file

@ -19,7 +19,7 @@ from .exceptions import (
FormatError,
StackError,
)
from . import ExtType
from .ext import ExtType, Timestamp
cdef extern from "unpack.h":
@ -31,6 +31,7 @@ cdef extern from "unpack.h":
PyObject* object_hook
PyObject* list_hook
PyObject* ext_hook
PyObject* timestamp_t
char *unicode_errors
Py_ssize_t max_str_len
Py_ssize_t max_bin_len
@ -98,6 +99,8 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("ext_hook must be a callable.")
ctx.user.ext_hook = <PyObject*>ext_hook
# Add Timestamp type to the user object so it may be used in unpack.h
ctx.user.timestamp_t = <PyObject*>Timestamp
ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data):

136
msgpack/ext.py Normal file
View file

@ -0,0 +1,136 @@
# coding: utf-8
from collections import namedtuple
import sys
import struct
PY2 = sys.version_info[0] == 2
if not PY2:
long = int
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if code == -1:
return Timestamp.from_bytes(data)
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
class Timestamp(object):
"""Timestamp represents the Timestamp extension type in msgpack.
When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python
msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`.
"""
__slots__ = ["seconds", "nanoseconds"]
def __init__(self, seconds, nanoseconds=0):
"""Initialize a Timestamp object.
:param seconds: Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). May be
negative. If :code:`seconds` includes a fractional part, :code:`nanoseconds` must be 0.
:type seconds: int or float
:param nanoseconds: Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999.
Default is 0.
:type nanoseconds: int
Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns.
"""
if not isinstance(seconds, (int, long, float)):
raise TypeError("seconds must be numeric")
if not isinstance(nanoseconds, (int, long)):
raise TypeError("nanoseconds must be an integer")
if nanoseconds:
if nanoseconds < 0 or nanoseconds % 1 != 0 or nanoseconds > (1e9 - 1):
raise ValueError("nanoseconds must be a non-negative integer less than 999999999.")
if not isinstance(seconds, (int, long)):
raise ValueError("seconds must be an integer if also providing nanoseconds.")
self.nanoseconds = nanoseconds
else:
# round helps with floating point issues
self.nanoseconds = int(round(seconds % 1 * 1e9, 0))
self.seconds = int(seconds // 1)
def __repr__(self):
"""String representation of Timestamp."""
return "Timestamp(seconds={0}, nanoseconds={1})".format(self.seconds, self.nanoseconds)
def __eq__(self, other):
"""Check for equality with another Timestamp object"""
if type(other) is self.__class__:
return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
return False
def __ne__(self, other):
"""not-equals method (see :func:`__eq__()`)"""
return not self.__eq__(other)
@staticmethod
def from_bytes(b):
"""Unpack bytes into a `Timestamp` object.
Used for pure-Python msgpack unpacking.
:param b: Payload from msgpack ext message with code -1
:type b: bytes
:returns: Timestamp object unpacked from msgpack ext payload
:rtype: Timestamp
"""
if len(b) == 4:
seconds = struct.unpack("!L", b)[0]
nanoseconds = 0
elif len(b) == 8:
data64 = struct.unpack("!Q", b)[0]
seconds = data64 & 0x00000003ffffffff
nanoseconds = data64 >> 34
elif len(b) == 12:
nanoseconds, seconds = struct.unpack("!Iq", b)
else:
raise ValueError("Timestamp type can only be created from 32, 64, or 96-bit byte objects")
return Timestamp(seconds, nanoseconds)
def to_bytes(self):
"""Pack this Timestamp object into bytes.
Used for pure-Python msgpack packing.
:returns data: Payload for EXT message with code -1 (timestamp type)
:rtype: bytes
"""
if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits
data64 = self.nanoseconds << 34 | self.seconds
if data64 & 0xffffffff00000000 == 0:
# nanoseconds is zero and seconds < 2**32, so timestamp 32
data = struct.pack("!L", data64)
else:
# timestamp 64
data = struct.pack("!Q", data64)
else:
# timestamp 96
data = struct.pack("!Iq", self.nanoseconds, self.seconds)
return data
def to_float_s(self):
"""Get the timestamp as a floating-point value.
:returns: posix timestamp
:rtype: float
"""
return self.seconds + self.nanoseconds/1e9
def to_unix_ns(self):
"""Get the timestamp as a unixtime in nanoseconds.
:returns: posix timestamp in nanoseconds
:rtype: int
"""
return int(self.seconds * 1e9 + self.nanoseconds)

View file

@ -66,7 +66,7 @@ from .exceptions import (
StackError,
)
from . import ExtType
from .ext import ExtType, Timestamp
EX_SKIP = 0
@ -826,9 +826,13 @@ class Packer(object):
if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xca, obj))
return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
if check(obj, ExtType):
code = obj.code
data = obj.data
if check(obj, (ExtType, Timestamp)):
if check(obj, Timestamp):
code = -1
data = obj.to_bytes()
else:
code = obj.code
data = obj.data
assert isinstance(code, int)
assert isinstance(data, bytes)
L = len(data)

View file

@ -759,6 +759,39 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l)
}
/*
* Pack Timestamp extension type. Follows msgpack-c pack_template.h.
*/
static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds)
{
if ((seconds >> 34) == 0) {
/* seconds is unsigned and fits in 34 bits */
uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds;
if ((data64 & 0xffffffff00000000L) == 0) {
/* no nanoseconds and seconds is 32bits or smaller. timestamp32. */
unsigned char buf[4];
uint32_t data32 = (uint32_t)data64;
msgpack_pack_ext(x, -1, 4);
_msgpack_store32(buf, data32);
msgpack_pack_raw_body(x, buf, 4);
} else {
/* timestamp64 */
unsigned char buf[8];
msgpack_pack_ext(x, -1, 8);
_msgpack_store64(buf, data64);
msgpack_pack_raw_body(x, buf, 8);
}
} else {
/* seconds is signed or >34bits */
unsigned char buf[12];
_msgpack_store32(&buf[0], nanoseconds);
_msgpack_store64(&buf[4], seconds);
msgpack_pack_ext(x, -1, 12);
msgpack_pack_raw_body(x, buf, 12);
}
return 0;
}
#undef msgpack_pack_append_buffer

View file

@ -27,6 +27,7 @@ typedef struct unpack_user {
PyObject *object_hook;
PyObject *list_hook;
PyObject *ext_hook;
PyObject *timestamp_t;
const char *unicode_errors;
Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
} unpack_user;
@ -259,6 +260,38 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0;
}
typedef struct msgpack_timestamp {
int64_t tv_sec;
uint32_t tv_nsec;
} msgpack_timestamp;
/*
* Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h.
*/
static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) {
switch (buflen) {
case 4:
ts->tv_nsec = 0;
{
uint32_t v = _msgpack_load32(uint32_t, buf);
ts->tv_sec = (int64_t)v;
}
return 0;
case 8: {
uint64_t value =_msgpack_load64(uint64_t, buf);
ts->tv_nsec = (uint32_t)(value >> 34);
ts->tv_sec = value & 0x00000003ffffffffLL;
return 0;
}
case 12:
ts->tv_nsec = _msgpack_load32(uint32_t, buf);
ts->tv_sec = _msgpack_load64(int64_t, buf + 4);
return 0;
default:
return -1;
}
}
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int length, msgpack_unpack_object* o)
{
@ -273,7 +306,16 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch
return -1;
}
// length also includes the typecode, so the actual data is length-1
py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1);
if (typecode == -1) {
msgpack_timestamp ts;
if (unpack_timestamp(pos, length-1, &ts) == 0) {
py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec);
} else {
py = NULL;
}
} else {
py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1);
}
if (!py)
return -1;
*o = py;

46
test/test_timestamp.py Normal file
View file

@ -0,0 +1,46 @@
import msgpack
from msgpack import Timestamp
def test_timestamp():
# timestamp32
ts = Timestamp(2**32 - 1)
assert ts.to_bytes() == b"\xff\xff\xff\xff"
packed = msgpack.packb(ts)
assert packed == b"\xd6\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0
# timestamp64
ts = Timestamp(2**34 - 1, 999999999)
assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff"
packed = msgpack.packb(ts)
assert packed == b"\xd7\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999
# timestamp96
ts = Timestamp(2**63 - 1, 999999999)
assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff"
packed = msgpack.packb(ts)
assert packed == b"\xc7\x0c\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999
# negative fractional
ts = Timestamp(-2.3) #s: -3, ns: 700000000
assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd"
packed = msgpack.packb(ts)
assert packed == b"\xc7\x0c\xff" + ts.to_bytes()
unpacked = msgpack.unpackb(packed)
assert ts == unpacked
assert ts.seconds == -3 and ts.nanoseconds == 700000000
def test_timestamp_to():
t = Timestamp(42, 14)
assert t.to_float_s() == 42.000000014
assert t.to_unix_ns() == 42000000014