diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 7e69a8044a305..7e4bd302a4f0a 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -172,6 +172,8 @@ Other enhancements - ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations ` (:issue:`6511`, :issue:`8505`) +- ``msgpack`` submodule has been updated to 0.4.6 with backward compatibility (:issue:`10581`) + .. ipython :: python s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D']) @@ -669,4 +671,5 @@ Bug Fixes - Bug in ``Series([np.nan]).astype('M8[ms]')``, which now returns ``Series([pd.NaT])`` (:issue:`10747`) - Bug in ``PeriodIndex.order`` reset freq (:issue:`10295`) - Bug in ``iloc`` allowing memory outside bounds of a Series to be accessed with negative integers (:issue:`10779`) +- Bug in ``read_msgpack`` where encoding is not respected (:issue:`10580`) - Bug preventing access to the first index when using ``iloc`` with a list containing the appropriate negative integer (:issue:`10547`, :issue:`10779`) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 6472da58ac711..d5c02736a1cf5 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -60,7 +60,7 @@ from pandas.core.internals import BlockManager, make_block import pandas.core.internals as internals -from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer +from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType # until we can pass this into our conversion functions, # this is pretty hacky @@ -131,7 +131,7 @@ def read_msgpack(path_or_buf, iterator=False, **kwargs): return Iterator(path_or_buf) def read(fh): - l = list(unpack(fh)) + l = list(unpack(fh, **kwargs)) if len(l) == 1: return l[0] return l @@ -222,7 +222,7 @@ def convert(values): # convert to a bytes array v = v.tostring() import zlib - return zlib.compress(v) + return ExtType(0, zlib.compress(v)) elif compressor == 'blosc': @@ -233,18 +233,24 @@ def convert(values): # convert to a bytes array v = v.tostring() import blosc - return blosc.compress(v, typesize=dtype.itemsize) + return ExtType(0, blosc.compress(v, typesize=dtype.itemsize)) # ndarray (on original dtype) - return v.tostring() + return ExtType(0, v.tostring()) def unconvert(values, dtype, compress=None): + as_is_ext = isinstance(values, ExtType) and values.code == 0 + + if as_is_ext: + values = values.data + if dtype == np.object_: return np.array(values, dtype=object) - values = values.encode('latin1') + if not as_is_ext: + values = values.encode('latin1') if compress == 'zlib': import zlib @@ -558,19 +564,23 @@ def create_block(b): def pack(o, default=encode, - encoding='latin1', unicode_errors='strict', use_single_float=False): + encoding='latin1', unicode_errors='strict', use_single_float=False, + autoreset=1, use_bin_type=1): """ Pack an object and return the packed bytes. """ return Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, - use_single_float=use_single_float).pack(o) + use_single_float=use_single_float, + autoreset=autoreset, + use_bin_type=use_bin_type).pack(o) def unpack(packed, object_hook=decode, list_hook=None, use_list=False, encoding='latin1', - unicode_errors='strict', object_pairs_hook=None): + unicode_errors='strict', object_pairs_hook=None, + max_buffer_size=0, ext_hook=ExtType): """ Unpack a packed object, return an iterator Note: packed lists will be returned as tuples @@ -580,7 +590,9 @@ def unpack(packed, object_hook=decode, list_hook=list_hook, use_list=use_list, encoding=encoding, unicode_errors=unicode_errors, - object_pairs_hook=object_pairs_hook) + object_pairs_hook=object_pairs_hook, + max_buffer_size=max_buffer_size, + ext_hook=ext_hook) class Packer(_Packer): @@ -588,11 +600,15 @@ class Packer(_Packer): def __init__(self, default=encode, encoding='latin1', unicode_errors='strict', - use_single_float=False): + use_single_float=False, + autoreset=1, + use_bin_type=1): super(Packer, self).__init__(default=default, encoding=encoding, unicode_errors=unicode_errors, - use_single_float=use_single_float) + use_single_float=use_single_float, + autoreset=autoreset, + use_bin_type=use_bin_type) class Unpacker(_Unpacker): @@ -600,7 +616,7 @@ class Unpacker(_Unpacker): def __init__(self, file_like=None, read_size=0, use_list=False, object_hook=decode, object_pairs_hook=None, list_hook=None, encoding='latin1', - unicode_errors='strict', max_buffer_size=0): + unicode_errors='strict', max_buffer_size=0, ext_hook=ExtType): super(Unpacker, self).__init__(file_like=file_like, read_size=read_size, use_list=use_list, @@ -609,7 +625,8 @@ def __init__(self, file_like=None, read_size=0, use_list=False, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors, - max_buffer_size=max_buffer_size) + max_buffer_size=max_buffer_size, + ext_hook=ext_hook) class Iterator(object): diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index b71fd32a29e1e..1267821086d61 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -54,9 +54,9 @@ def setUp(self): def tearDown(self): pass - def encode_decode(self, x, **kwargs): + def encode_decode(self, x, compress=None, **kwargs): with ensure_clean(self.path) as p: - to_msgpack(p, x, **kwargs) + to_msgpack(p, x, compress=compress, **kwargs) return read_msgpack(p, **kwargs) class TestAPI(TestPackers): @@ -517,12 +517,38 @@ def test_compression_blosc(self): assert_frame_equal(self.frame[k], i_rec[k]) +class TestEncoding(TestPackers): + def setUp(self): + super(TestEncoding, self).setUp() + data = { + 'A': [compat.u('\u2019')] * 1000, + 'B': np.arange(1000, dtype=np.int32), + 'C': list(100 * 'abcdefghij'), + 'D': date_range(datetime.datetime(2015, 4, 1), periods=1000), + 'E': [datetime.timedelta(days=x) for x in range(1000)], + 'G': [400] * 1000 + } + self.frame = { + 'float': DataFrame(dict((k, data[k]) for k in ['A', 'A'])), + 'int': DataFrame(dict((k, data[k]) for k in ['B', 'B'])), + 'mixed': DataFrame(data), + } + self.utf_encodings = ['utf8', 'utf16', 'utf32'] + + def test_utf(self): + # GH10581 + for encoding in self.utf_encodings: + for frame in compat.itervalues(self.frame): + result = self.encode_decode(frame, encoding=encoding) + assert_frame_equal(result, frame) + + class TestMsgpack(): """ How to add msgpack tests: 1. Install pandas version intended to output the msgpack. - +TestPackers 2. Execute "generate_legacy_storage_files.py" to create the msgpack. $ python generate_legacy_storage_files.py msgpack diff --git a/pandas/msgpack.pyx b/pandas/msgpack.pyx deleted file mode 100644 index 625ac55ee832c..0000000000000 --- a/pandas/msgpack.pyx +++ /dev/null @@ -1,669 +0,0 @@ -# coding: utf-8 -#cython: embedsignature=True -#cython: profile=False - -from cpython cimport * -cdef extern from "Python.h": - ctypedef char* const_char_ptr "const char*" - ctypedef char* const_void_ptr "const void*" - ctypedef struct PyObject - cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1 - -from libc.stdlib cimport * -from libc.string cimport * -from libc.limits cimport * - -import cython -import numpy as np -from numpy cimport * - -class UnpackException(IOError): - pass - - -class BufferFull(UnpackException): - pass - - -class OutOfData(UnpackException): - pass - - -class UnpackValueError(UnpackException, ValueError): - pass - - -class ExtraData(ValueError): - def __init__(self, unpacked, extra): - self.unpacked = unpacked - self.extra = extra - - def __str__(self): - return "unpack(b) recieved extra data." - -class PackException(IOError): - pass - -class PackValueError(PackException, ValueError): - pass - -cdef extern from "msgpack/unpack.h": - ctypedef struct msgpack_user: - bint use_list - PyObject* object_hook - bint has_pairs_hook # call object_hook with k-v pairs - PyObject* list_hook - char *encoding - char *unicode_errors - - ctypedef struct template_context: - msgpack_user user - PyObject* obj - size_t count - unsigned int ct - PyObject* key - - ctypedef int (*execute_fn)(template_context* ctx, const_char_ptr data, - size_t len, size_t* off) except? -1 - execute_fn template_construct - execute_fn template_skip - execute_fn read_array_header - execute_fn read_map_header - void template_init(template_context* ctx) - object template_data(template_context* ctx) - -cdef extern from "msgpack/pack.h": - struct msgpack_packer: - char* buf - size_t length - size_t buf_size - - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - -cdef int DEFAULT_RECURSE_LIMIT=511 - - - -cdef class Packer(object): - """MessagePack Packer - - usage: - - packer = Packer() - astream.write(packer.pack(a)) - astream.write(packer.pack(b)) - - Packer's constructor has some keyword arguments: - - * *defaut* - Convert user type to builtin type that Packer supports. - See also simplejson's document. - * *encoding* - Convert unicode to bytes with this encoding. (default: 'utf-8') - * *unicode_errors* - Error handler for encoding unicode. (default: 'strict') - * *use_single_float* - Use single precision float type for float. (default: False) - * *autoreset* - Reset buffer after each pack and return its content as `bytes`. (default: True). - If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. - """ - cdef msgpack_packer pk - cdef object _default - cdef object _bencoding - cdef object _berrors - cdef char *encoding - cdef char *unicode_errors - cdef bool use_float - cdef bint autoreset - - def __cinit__(self): - cdef int buf_size = 1024*1024 - self.pk.buf = malloc(buf_size); - if self.pk.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.pk.buf_size = buf_size - self.pk.length = 0 - - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, bint autoreset=1): - self.use_float = use_single_float - self.autoreset = autoreset - if default is not None: - if not PyCallable_Check(default): - raise TypeError("default must be a callable.") - self._default = default - if encoding is None: - self.encoding = NULL - self.unicode_errors = NULL - else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) - - def __dealloc__(self): - free(self.pk.buf); - - @cython.boundscheck(False) - @cython.wraparound(False) - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: - cdef long long llval - cdef unsigned long long ullval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d - cdef object dtype - - cdef int n,i - - if nest_limit < 0: - raise PackValueError("recursion limit exceeded.") - - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) - else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: no encoding is specified") - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyDict_CheckExact(o): - d = o - ret = msgpack_pack_map(&self.pk, len(d)) - if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyDict_Check(o): - ret = msgpack_pack_map(&self.pk, len(o)) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyTuple_Check(o) or PyList_Check(o): - ret = msgpack_pack_array(&self.pk, len(o)) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - - elif self._default: - o = self._default(o) - ret = self._pack(o, nest_limit-1) - else: - raise TypeError("can't serialize %r" % (o,)) - return ret - - cpdef pack(self, object obj): - cdef int ret - ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) - if ret == -1: - raise MemoryError - elif ret: # should not happen. - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_array_header(self, size_t size): - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_map_header(self, size_t size): - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_map_pairs(self, object pairs): - """ - Pack *pairs* as msgpack map type. - - *pairs* should sequence of pair. - (`len(pairs)` and `for k, v in *pairs*:` should be supported.) - """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def reset(self): - """Clear internal buffer.""" - self.pk.length = 0 - - def bytes(self): - """Return buffer content.""" - return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - - - cdef inline pack_pair(self, object k, object v, int nest_limit): - ret = self._pack(k, nest_limit-1) - if ret != 0: raise PackException("cannot pack : %s" % k) - ret = self._pack(v, nest_limit-1) - if ret != 0: raise PackException("cannot pack : %s" % v) - return ret - -def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): - """ - pack an object `o` and write it to stream).""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) - stream.write(packer.pack(o)) - -def packb(object o, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False): - """ - pack o and return packed bytes.""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, - use_single_float=use_single_float) - return packer.pack(o) - - -cdef inline init_ctx(template_context *ctx, - object object_hook, object object_pairs_hook, object list_hook, - bint use_list, char* encoding, char* unicode_errors): - template_init(ctx) - ctx.user.use_list = use_list - ctx.user.object_hook = ctx.user.list_hook = NULL - - if object_hook is not None and object_pairs_hook is not None: - raise ValueError("object_pairs_hook and object_hook are mutually exclusive.") - - if object_hook is not None: - if not PyCallable_Check(object_hook): - raise TypeError("object_hook must be a callable.") - ctx.user.object_hook = object_hook - - if object_pairs_hook is None: - ctx.user.has_pairs_hook = False - else: - if not PyCallable_Check(object_pairs_hook): - raise TypeError("object_pairs_hook must be a callable.") - ctx.user.object_hook = object_pairs_hook - ctx.user.has_pairs_hook = True - - if list_hook is not None: - if not PyCallable_Check(list_hook): - raise TypeError("list_hook must be a callable.") - ctx.user.list_hook = list_hook - - ctx.user.encoding = encoding - ctx.user.unicode_errors = unicode_errors - -def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, - ): - """Unpack packed_bytes to object. Returns an unpacked object. - - Raises `ValueError` when `packed` contains extra bytes. - """ - cdef template_context ctx - cdef size_t off = 0 - cdef int ret - - cdef char* buf - cdef Py_ssize_t buf_len - cdef char* cenc = NULL - cdef char* cerr = NULL - - PyObject_AsReadBuffer(packed, &buf, &buf_len) - - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) - - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) - ret = template_construct(&ctx, buf, buf_len, &off) - if ret == 1: - obj = template_data(&ctx) - if off < buf_len: - raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) - return obj - elif ret < 0: - raise ValueError("Unpack failed: error = %d" % (ret,)) - else: - raise UnpackValueError - - -def unpack(object stream, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, - ): - """Unpack an object from `stream`. - - Raises `ValueError` when `stream` has extra bytes. - """ - return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, - encoding=encoding, unicode_errors=unicode_errors, - ) - - -cdef class Unpacker(object): - """ - Streaming unpacker. - - `file_like` is a file-like object having `.read(n)` method. - When `Unpacker` initialized with `file_like`, unpacker reads serialized data - from it and `.feed()` method is not usable. - - `read_size` is used as `file_like.read(read_size)`. - (default: min(1024**2, max_buffer_size)) - - If `use_list` is true (default), msgpack list is deserialized to Python list. - Otherwise, it is deserialized to Python tuple. - - `object_hook` is same to simplejson. If it is not None, it should be callable - and Unpacker calls it with a dict argument after deserializing a map. - - `object_pairs_hook` is same to simplejson. If it is not None, it should be callable - and Unpacker calls it with a list of key-value pairs after deserializing a map. - - `encoding` is encoding used for decoding msgpack bytes. If it is None (default), - msgpack bytes is deserialized to Python bytes. - - `unicode_errors` is used for decoding bytes. - - `max_buffer_size` limits size of data waiting unpacked. - 0 means system's INT_MAX (default). - Raises `BufferFull` exception when it is insufficient. - You shoud set this parameter when unpacking data from untrasted source. - - example of streaming deserialize from file-like object:: - - unpacker = Unpacker(file_like) - for o in unpacker: - do_something(o) - - example of streaming deserialize from socket:: - - unpacker = Unpacker() - while 1: - buf = sock.recv(1024**2) - if not buf: - break - unpacker.feed(buf) - for o in unpacker: - do_something(o) - """ - cdef template_context ctx - cdef char* buf - cdef size_t buf_size, buf_head, buf_tail - cdef object file_like - cdef object file_like_read - cdef Py_ssize_t read_size - cdef object object_hook - cdef object encoding, unicode_errors - cdef size_t max_buffer_size - - def __cinit__(self): - self.buf = NULL - - def __dealloc__(self): - free(self.buf) - self.buf = NULL - - def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, - object object_hook=None, object object_pairs_hook=None, object list_hook=None, - encoding=None, unicode_errors='strict', int max_buffer_size=0, - ): - cdef char *cenc=NULL, *cerr=NULL - - self.file_like = file_like - if file_like: - self.file_like_read = file_like.read - if not PyCallable_Check(self.file_like_read): - raise ValueError("`file_like.read` must be a callable.") - if not max_buffer_size: - max_buffer_size = INT_MAX - if read_size > max_buffer_size: - raise ValueError("read_size should be less or equal to max_buffer_size") - if not read_size: - read_size = min(max_buffer_size, 1024**2) - self.max_buffer_size = max_buffer_size - self.read_size = read_size - self.buf = malloc(read_size) - if self.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.buf_size = read_size - self.buf_head = 0 - self.buf_tail = 0 - - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - self.encoding = encoding - cenc = PyBytes_AsString(encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - self.unicode_errors = unicode_errors - cerr = PyBytes_AsString(unicode_errors) - - init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) - - def feed(self, object next_bytes): - """Append `next_bytes` to internal buffer.""" - cdef char* buf - cdef Py_ssize_t buf_len - if self.file_like is not None: - raise TypeError( - "unpacker.feed() is not be able to use with `file_like`.") - PyObject_AsReadBuffer(next_bytes, &buf, &buf_len) - self.append_buffer(buf, buf_len) - - cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): - cdef: - char* buf = self.buf - char* new_buf - size_t head = self.buf_head - size_t tail = self.buf_tail - size_t buf_size = self.buf_size - size_t new_size - - if tail + _buf_len > buf_size: - if ((tail - head) + _buf_len) <= buf_size: - # move to front. - memmove(buf, buf + head, tail - head) - tail -= head - head = 0 - else: - # expand buffer. - new_size = (tail-head) + _buf_len - if new_size > self.max_buffer_size: - raise BufferFull - new_size = min(new_size*2, self.max_buffer_size) - new_buf = malloc(new_size) - if new_buf == NULL: - # self.buf still holds old buffer and will be freed during - # obj destruction - raise MemoryError("Unable to enlarge internal buffer.") - memcpy(new_buf, buf + head, tail - head) - free(buf) - - buf = new_buf - buf_size = new_size - tail -= head - head = 0 - - memcpy(buf + tail, (_buf), _buf_len) - self.buf = buf - self.buf_head = head - self.buf_size = buf_size - self.buf_tail = tail + _buf_len - - cdef read_from_file(self): - next_bytes = self.file_like_read( - min(self.read_size, - self.max_buffer_size - (self.buf_tail - self.buf_head) - )) - if next_bytes: - self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) - else: - self.file_like = None - - cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): - cdef int ret - cdef object obj - cdef size_t prev_head - while 1: - prev_head = self.buf_head - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) - - if ret == 1: - obj = template_data(&self.ctx) - template_init(&self.ctx) - return obj - elif ret == 0: - if self.file_like is not None: - self.read_from_file() - continue - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") - else: - raise ValueError("Unpack failed: error = %d" % (ret,)) - - def read_bytes(self, Py_ssize_t nbytes): - """read a specified number of raw bytes from the stream""" - cdef size_t nread - nread = min(self.buf_tail - self.buf_head, nbytes) - ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) - self.buf_head += nread - if len(ret) < nbytes and self.file_like is not None: - ret += self.file_like.read(nbytes - len(ret)) - return ret - - def unpack(self, object write_bytes=None): - """ - unpack one object - - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(template_construct, write_bytes) - - def skip(self, object write_bytes=None): - """ - read and ignore one object, returning None - - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(template_skip, write_bytes) - - def read_array_header(self, object write_bytes=None): - """assuming the next object is an array, return its size n, such that - the next n unpack() calls will iterate over its contents. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(read_array_header, write_bytes) - - def read_map_header(self, object write_bytes=None): - """assuming the next object is a map, return its size n, such that the - next n * 2 unpack() calls will iterate over its key-value pairs. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(read_map_header, write_bytes) - - def __iter__(self): - return self - - def __next__(self): - return self._unpack(template_construct, None, 1) - - # for debug. - #def _buf(self): - # return PyString_FromStringAndSize(self.buf, self.buf_tail) - - #def _off(self): - # return self.buf_head diff --git a/pandas/msgpack/__init__.py b/pandas/msgpack/__init__.py new file mode 100644 index 0000000000000..bf0e2853ae131 --- /dev/null +++ b/pandas/msgpack/__init__.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from pandas.msgpack._version import version +from pandas.msgpack.exceptions import * + +from collections import namedtuple + + +class ExtType(namedtuple('ExtType', 'code data')): + """ExtType represents ext type in msgpack.""" + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super(ExtType, cls).__new__(cls, code, data) + + +import os +from pandas.msgpack._packer import Packer +from pandas.msgpack._unpacker import unpack, unpackb, Unpacker + + + +def pack(o, stream, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + + +def packb(o, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + +# alias for compatibility to simplejson/marshal/pickle. +load = unpack +loads = unpackb + +dump = pack +dumps = packb diff --git a/pandas/msgpack/_packer.pyx b/pandas/msgpack/_packer.pyx new file mode 100644 index 0000000000000..5004b9e8e7262 --- /dev/null +++ b/pandas/msgpack/_packer.pyx @@ -0,0 +1,294 @@ +# coding: utf-8 +#cython: embedsignature=True + +from cpython cimport * +from libc.stdlib cimport * +from libc.string cimport * +from libc.limits cimport * + +from pandas.msgpack.exceptions import PackValueError +from pandas.msgpack import ExtType + + +cdef extern from "../src/msgpack/pack.h": + struct msgpack_packer: + char* buf + size_t length + size_t buf_size + bint use_bin_type + + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_float(msgpack_packer* pk, float d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_bin(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) + +cdef int DEFAULT_RECURSE_LIMIT=511 + + +cdef class Packer(object): + """ + MessagePack Packer + + usage:: + + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) + + Packer's constructor has some keyword arguments: + + :param callable default: + Convert user type to builtin type that Packer supports. + See also simplejson's document. + :param str encoding: + Convert unicode to bytes with this encoding. (default: 'utf-8') + :param str unicode_errors: + Error handler for encoding unicode. (default: 'strict') + :param bool use_single_float: + Use single precision float type for float. (default: False) + :param bool autoreset: + Reset buffer after each pack and return it's content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + :param bool use_bin_type: + Use bin type introduced in msgpack spec 2.0 for bytes. + It also enable str8 type for unicode. + """ + cdef msgpack_packer pk + cdef object _default + cdef object _bencoding + cdef object _berrors + cdef char *encoding + cdef char *unicode_errors + cdef bool use_float + cdef bint autoreset + + def __cinit__(self): + cdef int buf_size = 1024*1024 + self.pk.buf = malloc(buf_size); + if self.pk.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.pk.buf_size = buf_size + self.pk.length = 0 + + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + use_single_float=False, bint autoreset=1, bint use_bin_type=0): + """ + """ + self.use_float = use_single_float + self.autoreset = autoreset + self.pk.use_bin_type = use_bin_type + if default is not None: + if not PyCallable_Check(default): + raise TypeError("default must be a callable.") + self._default = default + if encoding is None: + self.encoding = NULL + self.unicode_errors = NULL + else: + if isinstance(encoding, unicode): + self._bencoding = encoding.encode('ascii') + else: + self._bencoding = encoding + self.encoding = PyBytes_AsString(self._bencoding) + if isinstance(unicode_errors, unicode): + self._berrors = unicode_errors.encode('ascii') + else: + self._berrors = unicode_errors + self.unicode_errors = PyBytes_AsString(self._berrors) + + def __dealloc__(self): + free(self.pk.buf); + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef long long llval + cdef unsigned long long ullval + cdef long longval + cdef float fval + cdef double dval + cdef char* rawval + cdef int ret + cdef dict d + cdef size_t L + cdef int default_used = 0 + + if nest_limit < 0: + raise PackValueError("recursion limit exceeded.") + + while True: + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif isinstance(o, bool): + if o: + ret = msgpack_pack_true(&self.pk) + else: + ret = msgpack_pack_false(&self.pk) + elif PyLong_Check(o): + # PyInt_Check(long) is True for Python 3. + # Sow we should test long before int. + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + elif PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytes_Check(o): + L = len(o) + if L > (2**32)-1: + raise ValueError("bytes is too large") + rawval = o + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_Check(o): + if not self.encoding: + raise TypeError("Can't encode unicode string: no encoding is specified") + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + L = len(o) + if L > (2**32)-1: + raise ValueError("dict is too large") + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyDict_CheckExact(o): + d = o + L = len(d) + if L > (2**32)-1: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyDict_Check(o): + L = len(o) + if L > (2**32)-1: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + longval = o.code + rawval = o.data + L = len(o.data) + if L > (2**32)-1: + raise ValueError("EXT data is too large") + ret = msgpack_pack_ext(&self.pk, longval, L) + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyTuple_Check(o) or PyList_Check(o): + L = len(o) + if L > (2**32)-1: + raise ValueError("list is too large") + ret = msgpack_pack_array(&self.pk, L) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif not default_used and self._default: + o = self._default(o) + default_used = 1 + continue + else: + raise TypeError("can't serialize %r" % (o,)) + return ret + + cpdef pack(self, object obj): + cdef int ret + ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) + if ret == -1: + raise MemoryError + elif ret: # should not happen. + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_ext_type(self, typecode, data): + msgpack_pack_ext(&self.pk, typecode, len(data)) + msgpack_pack_raw_body(&self.pk, data, len(data)) + + def pack_array_header(self, size_t size): + if size > (2**32-1): + raise ValueError + cdef int ret = msgpack_pack_array(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_map_header(self, size_t size): + if size > (2**32-1): + raise ValueError + cdef int ret = msgpack_pack_map(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_map_pairs(self, object pairs): + """ + Pack *pairs* as msgpack map type. + + *pairs* should sequence of pair. + (`len(pairs)` and `for k, v in pairs:` should be supported.) + """ + cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) + if ret == 0: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def reset(self): + """Clear internal buffer.""" + self.pk.length = 0 + + def bytes(self): + """Return buffer content.""" + return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) diff --git a/pandas/msgpack/_unpacker.pyx b/pandas/msgpack/_unpacker.pyx new file mode 100644 index 0000000000000..f68bf3369427c --- /dev/null +++ b/pandas/msgpack/_unpacker.pyx @@ -0,0 +1,466 @@ +# coding: utf-8 +#cython: embedsignature=True + +from cpython cimport * +cdef extern from "Python.h": + ctypedef struct PyObject + cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 + +from libc.stdlib cimport * +from libc.string cimport * +from libc.limits cimport * + +from pandas.msgpack.exceptions import ( + BufferFull, + OutOfData, + UnpackValueError, + ExtraData, + ) +from pandas.msgpack import ExtType + + +cdef extern from "../src/msgpack/unpack.h": + ctypedef struct msgpack_user: + bint use_list + PyObject* object_hook + bint has_pairs_hook # call object_hook with k-v pairs + PyObject* list_hook + PyObject* ext_hook + char *encoding + char *unicode_errors + Py_ssize_t max_str_len + Py_ssize_t max_bin_len + Py_ssize_t max_array_len + Py_ssize_t max_map_len + Py_ssize_t max_ext_len + + ctypedef struct unpack_context: + msgpack_user user + PyObject* obj + size_t count + + ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, + size_t len, size_t* off) except? -1 + execute_fn unpack_construct + execute_fn unpack_skip + execute_fn read_array_header + execute_fn read_map_header + void unpack_init(unpack_context* ctx) + object unpack_data(unpack_context* ctx) + +cdef inline init_ctx(unpack_context *ctx, + object object_hook, object object_pairs_hook, + object list_hook, object ext_hook, + bint use_list, char* encoding, char* unicode_errors, + Py_ssize_t max_str_len, Py_ssize_t max_bin_len, + Py_ssize_t max_array_len, Py_ssize_t max_map_len, + Py_ssize_t max_ext_len): + unpack_init(ctx) + ctx.user.use_list = use_list + ctx.user.object_hook = ctx.user.list_hook = NULL + ctx.user.max_str_len = max_str_len + ctx.user.max_bin_len = max_bin_len + ctx.user.max_array_len = max_array_len + ctx.user.max_map_len = max_map_len + ctx.user.max_ext_len = max_ext_len + + if object_hook is not None and object_pairs_hook is not None: + raise TypeError("object_pairs_hook and object_hook are mutually exclusive.") + + if object_hook is not None: + if not PyCallable_Check(object_hook): + raise TypeError("object_hook must be a callable.") + ctx.user.object_hook = object_hook + + if object_pairs_hook is None: + ctx.user.has_pairs_hook = False + else: + if not PyCallable_Check(object_pairs_hook): + raise TypeError("object_pairs_hook must be a callable.") + ctx.user.object_hook = object_pairs_hook + ctx.user.has_pairs_hook = True + + if list_hook is not None: + if not PyCallable_Check(list_hook): + raise TypeError("list_hook must be a callable.") + ctx.user.list_hook = list_hook + + if ext_hook is not None: + if not PyCallable_Check(ext_hook): + raise TypeError("ext_hook must be a callable.") + ctx.user.ext_hook = ext_hook + + ctx.user.encoding = encoding + ctx.user.unicode_errors = unicode_errors + +def default_read_extended_type(typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + +def unpackb(object packed, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors="strict", + object_pairs_hook=None, ext_hook=ExtType, + Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_bin_len=2147483647, + Py_ssize_t max_array_len=2147483647, + Py_ssize_t max_map_len=2147483647, + Py_ssize_t max_ext_len=2147483647): + """ + Unpack packed_bytes to object. Returns an unpacked object. + + Raises `ValueError` when `packed` contains extra bytes. + + See :class:`Unpacker` for options. + """ + cdef unpack_context ctx + cdef size_t off = 0 + cdef int ret + + cdef char* buf + cdef Py_ssize_t buf_len + cdef char* cenc = NULL + cdef char* cerr = NULL + + PyObject_AsReadBuffer(packed, &buf, &buf_len) + + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + cenc = PyBytes_AsString(encoding) + + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + cerr = PyBytes_AsString(unicode_errors) + + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr, + max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + ret = unpack_construct(&ctx, buf, buf_len, &off) + if ret == 1: + obj = unpack_data(&ctx) + if off < buf_len: + raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) + return obj + else: + raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + + +def unpack(object stream, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors="strict", + object_pairs_hook=None, + ): + """ + Unpack an object from `stream`. + + Raises `ValueError` when `stream` has extra bytes. + + See :class:`Unpacker` for options. + """ + return unpackb(stream.read(), use_list=use_list, + object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, + encoding=encoding, unicode_errors=unicode_errors, + ) + + +cdef class Unpacker(object): + """Streaming unpacker. + + arguments: + + :param file_like: + File-like object having `.read(n)` method. + If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + + :param int read_size: + Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) + + :param bool use_list: + If true, unpack msgpack array to Python list. + Otherwise, unpack to Python tuple. (default: True) + + :param callable object_hook: + When specified, it should be callable. + Unpacker calls it with a dict argument after unpacking msgpack map. + (See also simplejson) + + :param callable object_pairs_hook: + When specified, it should be callable. + Unpacker calls it with a list of key-value pairs after unpacking msgpack map. + (See also simplejson) + + :param str encoding: + Encoding used for decoding msgpack raw. + If it is None (default), msgpack raw is deserialized to Python bytes. + + :param str unicode_errors: + Used for decoding msgpack raw with *encoding*. + (default: `'strict'`) + + :param int max_buffer_size: + Limits size of data waiting unpacked. 0 means system's INT_MAX (default). + Raises `BufferFull` exception when it is insufficient. + You shoud set this parameter when unpacking data from untrasted source. + + :param int max_str_len: + Limits max length of str. (default: 2**31-1) + + :param int max_bin_len: + Limits max length of bin. (default: 2**31-1) + + :param int max_array_len: + Limits max length of array. (default: 2**31-1) + + :param int max_map_len: + Limits max length of map. (default: 2**31-1) + + + example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + """ + cdef unpack_context ctx + cdef char* buf + cdef size_t buf_size, buf_head, buf_tail + cdef object file_like + cdef object file_like_read + cdef Py_ssize_t read_size + # To maintain refcnt. + cdef object object_hook, object_pairs_hook, list_hook, ext_hook + cdef object encoding, unicode_errors + cdef size_t max_buffer_size + + def __cinit__(self): + self.buf = NULL + + def __dealloc__(self): + free(self.buf) + self.buf = NULL + + def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, + object object_hook=None, object object_pairs_hook=None, object list_hook=None, + encoding=None, unicode_errors='strict', int max_buffer_size=0, + object ext_hook=ExtType, + Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_bin_len=2147483647, + Py_ssize_t max_array_len=2147483647, + Py_ssize_t max_map_len=2147483647, + Py_ssize_t max_ext_len=2147483647): + cdef char *cenc=NULL, + cdef char *cerr=NULL + + self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook + self.list_hook = list_hook + self.ext_hook = ext_hook + + self.file_like = file_like + if file_like: + self.file_like_read = file_like.read + if not PyCallable_Check(self.file_like_read): + raise TypeError("`file_like.read` must be a callable.") + if not max_buffer_size: + max_buffer_size = INT_MAX + if read_size > max_buffer_size: + raise ValueError("read_size should be less or equal to max_buffer_size") + if not read_size: + read_size = min(max_buffer_size, 1024**2) + self.max_buffer_size = max_buffer_size + self.read_size = read_size + self.buf = malloc(read_size) + if self.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.buf_size = read_size + self.buf_head = 0 + self.buf_tail = 0 + + if encoding is not None: + if isinstance(encoding, unicode): + self.encoding = encoding.encode('ascii') + elif isinstance(encoding, bytes): + self.encoding = encoding + else: + raise TypeError("encoding should be bytes or unicode") + cenc = PyBytes_AsString(self.encoding) + + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + self.unicode_errors = unicode_errors.encode('ascii') + elif isinstance(unicode_errors, bytes): + self.unicode_errors = unicode_errors + else: + raise TypeError("unicode_errors should be bytes or unicode") + cerr = PyBytes_AsString(self.unicode_errors) + + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, + ext_hook, use_list, cenc, cerr, + max_str_len, max_bin_len, max_array_len, + max_map_len, max_ext_len) + + def feed(self, object next_bytes): + """Append `next_bytes` to internal buffer.""" + cdef Py_buffer pybuff + if self.file_like is not None: + raise AssertionError( + "unpacker.feed() is not be able to use with `file_like`.") + PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) + try: + self.append_buffer(pybuff.buf, pybuff.len) + finally: + PyBuffer_Release(&pybuff) + + cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): + cdef: + char* buf = self.buf + char* new_buf + size_t head = self.buf_head + size_t tail = self.buf_tail + size_t buf_size = self.buf_size + size_t new_size + + if tail + _buf_len > buf_size: + if ((tail - head) + _buf_len) <= buf_size: + # move to front. + memmove(buf, buf + head, tail - head) + tail -= head + head = 0 + else: + # expand buffer. + new_size = (tail-head) + _buf_len + if new_size > self.max_buffer_size: + raise BufferFull + new_size = min(new_size*2, self.max_buffer_size) + new_buf = malloc(new_size) + if new_buf == NULL: + # self.buf still holds old buffer and will be freed during + # obj destruction + raise MemoryError("Unable to enlarge internal buffer.") + memcpy(new_buf, buf + head, tail - head) + free(buf) + + buf = new_buf + buf_size = new_size + tail -= head + head = 0 + + memcpy(buf + tail, (_buf), _buf_len) + self.buf = buf + self.buf_head = head + self.buf_size = buf_size + self.buf_tail = tail + _buf_len + + cdef read_from_file(self): + next_bytes = self.file_like_read( + min(self.read_size, + self.max_buffer_size - (self.buf_tail - self.buf_head) + )) + if next_bytes: + self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) + else: + self.file_like = None + + cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): + cdef int ret + cdef object obj + cdef size_t prev_head + + if self.buf_head >= self.buf_tail and self.file_like is not None: + self.read_from_file() + + while 1: + prev_head = self.buf_head + if prev_head >= self.buf_tail: + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") + + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + if write_bytes is not None: + write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) + + if ret == 1: + obj = unpack_data(&self.ctx) + unpack_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") + else: + raise ValueError("Unpack failed: error = %d" % (ret,)) + + def read_bytes(self, Py_ssize_t nbytes): + """Read a specified number of raw bytes from the stream""" + cdef size_t nread + nread = min(self.buf_tail - self.buf_head, nbytes) + ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) + self.buf_head += nread + if len(ret) < nbytes and self.file_like is not None: + ret += self.file_like.read(nbytes - len(ret)) + return ret + + def unpack(self, object write_bytes=None): + """Unpack one object + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(unpack_construct, write_bytes) + + def skip(self, object write_bytes=None): + """Read and ignore one object, returning None + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(unpack_skip, write_bytes) + + def read_array_header(self, object write_bytes=None): + """assuming the next object is an array, return its size n, such that + the next n unpack() calls will iterate over its contents. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(read_array_header, write_bytes) + + def read_map_header(self, object write_bytes=None): + """assuming the next object is a map, return its size n, such that the + next n * 2 unpack() calls will iterate over its key-value pairs. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(read_map_header, write_bytes) + + def __iter__(self): + return self + + def __next__(self): + return self._unpack(unpack_construct, None, 1) + + # for debug. + #def _buf(self): + # return PyString_FromStringAndSize(self.buf, self.buf_tail) + + #def _off(self): + # return self.buf_head diff --git a/pandas/msgpack/_version.py b/pandas/msgpack/_version.py new file mode 100644 index 0000000000000..2c1c96c0759a1 --- /dev/null +++ b/pandas/msgpack/_version.py @@ -0,0 +1 @@ +version = (0, 4, 6) diff --git a/pandas/msgpack/exceptions.py b/pandas/msgpack/exceptions.py new file mode 100644 index 0000000000000..f7678f135bd26 --- /dev/null +++ b/pandas/msgpack/exceptions.py @@ -0,0 +1,29 @@ +class UnpackException(Exception): + pass + + +class BufferFull(UnpackException): + pass + + +class OutOfData(UnpackException): + pass + + +class UnpackValueError(UnpackException, ValueError): + pass + + +class ExtraData(ValueError): + def __init__(self, unpacked, extra): + self.unpacked = unpacked + self.extra = extra + + def __str__(self): + return "unpack(b) received extra data." + +class PackException(Exception): + pass + +class PackValueError(PackException, ValueError): + pass diff --git a/pandas/src/msgpack/pack.h b/pandas/src/msgpack/pack.h index e4c315c1161b1..02379c9188424 100644 --- a/pandas/src/msgpack/pack.h +++ b/pandas/src/msgpack/pack.h @@ -34,18 +34,18 @@ typedef struct msgpack_packer { char *buf; size_t length; size_t buf_size; + bool use_bin_type; } msgpack_packer; typedef struct Packer Packer; -static inline int msgpack_pack_short(msgpack_packer* pk, short d); static inline int msgpack_pack_int(msgpack_packer* pk, int d); static inline int msgpack_pack_long(msgpack_packer* pk, long d); static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); -static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); +//static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); @@ -68,8 +68,11 @@ static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); +static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); +static inline int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l); + static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; @@ -90,14 +93,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ return 0; } -#define msgpack_pack_inline_func(name) \ - static inline int msgpack_pack ## name - -#define msgpack_pack_inline_func_cint(name) \ - static inline int msgpack_pack ## name - -#define msgpack_pack_user msgpack_packer* - #define msgpack_pack_append_buffer(user, buf, len) \ return msgpack_pack_write(user, (const char*)buf, len) diff --git a/pandas/src/msgpack/pack_template.h b/pandas/src/msgpack/pack_template.h index 65c959dd8ce63..5d1088f4b7d78 100644 --- a/pandas/src/msgpack/pack_template.h +++ b/pandas/src/msgpack/pack_template.h @@ -28,14 +28,6 @@ #define TAKE8_64(d) ((uint8_t*)&d)[7] #endif -#ifndef msgpack_pack_inline_func -#error msgpack_pack_inline_func template is not defined -#endif - -#ifndef msgpack_pack_user -#error msgpack_pack_user type is not defined -#endif - #ifndef msgpack_pack_append_buffer #error msgpack_pack_append_buffer callback is not defined #endif @@ -47,584 +39,524 @@ #define msgpack_pack_real_uint8(x, d) \ do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ } while(0) #define msgpack_pack_real_uint16(x, d) \ do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ - } else if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ + } else if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ } while(0) #define msgpack_pack_real_uint32(x, d) \ do { \ - if(d < (1<<8)) { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else { \ - if(d < (1<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } \ + if(d < (1<<8)) { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else { \ + if(d < (1<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } \ } while(0) #define msgpack_pack_real_uint64(x, d) \ do { \ - if(d < (1ULL<<8)) { \ - if(d < (1ULL<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else { \ - if(d < (1ULL<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else if(d < (1ULL<<32)) { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else { \ - /* unsigned 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } \ - } \ + if(d < (1ULL<<8)) { \ + if(d < (1ULL<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else { \ + if(d < (1ULL<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else if(d < (1ULL<<32)) { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else { \ + /* unsigned 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } \ + } \ } while(0) #define msgpack_pack_real_int8(x, d) \ do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ + if(d < -(1<<5)) { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } \ } while(0) #define msgpack_pack_real_int16(x, d) \ do { \ - if(d < -(1<<5)) { \ - if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ - } else { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ - } \ + if(d < -(1<<5)) { \ + if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ + } else { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ + } \ } while(0) #define msgpack_pack_real_int32(x, d) \ do { \ - if(d < -(1<<5)) { \ - if(d < -(1<<15)) { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ - } else { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else if(d < (1<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } \ + if(d < -(1<<5)) { \ + if(d < -(1<<15)) { \ + /* signed 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ + } else { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else if(d < (1<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } \ } while(0) #define msgpack_pack_real_int64(x, d) \ do { \ - if(d < -(1LL<<5)) { \ - if(d < -(1LL<<15)) { \ - if(d < -(1LL<<31)) { \ - /* signed 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } else { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } else { \ - if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ - } else { \ - if(d < (1LL<<16)) { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ - } else { \ - if(d < (1LL<<32)) { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else { \ - /* unsigned 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } \ - } \ - } \ + if(d < -(1LL<<5)) { \ + if(d < -(1LL<<15)) { \ + if(d < -(1LL<<31)) { \ + /* signed 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } else { \ + /* signed 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } else { \ + if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ + } else { \ + if(d < (1LL<<16)) { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ + } else { \ + if(d < (1LL<<32)) { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else { \ + /* unsigned 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } \ + } \ + } \ } while(0) -#ifdef msgpack_pack_inline_func_fixint - -msgpack_pack_inline_func_fixint(_uint8)(msgpack_pack_user x, uint8_t d) -{ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; - msgpack_pack_append_buffer(x, buf, 2); -} - -msgpack_pack_inline_func_fixint(_uint16)(msgpack_pack_user x, uint16_t d) -{ - unsigned char buf[3]; - buf[0] = 0xcd; _msgpack_store16(&buf[1], d); - msgpack_pack_append_buffer(x, buf, 3); -} - -msgpack_pack_inline_func_fixint(_uint32)(msgpack_pack_user x, uint32_t d) -{ - unsigned char buf[5]; - buf[0] = 0xce; _msgpack_store32(&buf[1], d); - msgpack_pack_append_buffer(x, buf, 5); -} - -msgpack_pack_inline_func_fixint(_uint64)(msgpack_pack_user x, uint64_t d) -{ - unsigned char buf[9]; - buf[0] = 0xcf; _msgpack_store64(&buf[1], d); - msgpack_pack_append_buffer(x, buf, 9); -} - -msgpack_pack_inline_func_fixint(_int8)(msgpack_pack_user x, int8_t d) +static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) { - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; - msgpack_pack_append_buffer(x, buf, 2); + msgpack_pack_real_uint8(x, d); } -msgpack_pack_inline_func_fixint(_int16)(msgpack_pack_user x, int16_t d) +static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) { - unsigned char buf[3]; - buf[0] = 0xd1; _msgpack_store16(&buf[1], d); - msgpack_pack_append_buffer(x, buf, 3); + msgpack_pack_real_uint16(x, d); } -msgpack_pack_inline_func_fixint(_int32)(msgpack_pack_user x, int32_t d) +static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) { - unsigned char buf[5]; - buf[0] = 0xd2; _msgpack_store32(&buf[1], d); - msgpack_pack_append_buffer(x, buf, 5); + msgpack_pack_real_uint32(x, d); } -msgpack_pack_inline_func_fixint(_int64)(msgpack_pack_user x, int64_t d) +static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) { - unsigned char buf[9]; - buf[0] = 0xd3; _msgpack_store64(&buf[1], d); - msgpack_pack_append_buffer(x, buf, 9); + msgpack_pack_real_uint64(x, d); } -#undef msgpack_pack_inline_func_fixint -#endif - - -msgpack_pack_inline_func(_uint8)(msgpack_pack_user x, uint8_t d) +static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) { - msgpack_pack_real_uint8(x, d); + msgpack_pack_real_int8(x, d); } -msgpack_pack_inline_func(_uint16)(msgpack_pack_user x, uint16_t d) +static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) { - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_int16(x, d); } -msgpack_pack_inline_func(_uint32)(msgpack_pack_user x, uint32_t d) +static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) { - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_int32(x, d); } -msgpack_pack_inline_func(_uint64)(msgpack_pack_user x, uint64_t d) +static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) { - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_int64(x, d); } -msgpack_pack_inline_func(_int8)(msgpack_pack_user x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} -msgpack_pack_inline_func(_int16)(msgpack_pack_user x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} +//#ifdef msgpack_pack_inline_func_cint -msgpack_pack_inline_func(_int32)(msgpack_pack_user x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -msgpack_pack_inline_func(_int64)(msgpack_pack_user x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -#ifdef msgpack_pack_inline_func_cint - -msgpack_pack_inline_func_cint(_short)(msgpack_pack_user x, short d) +static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) #if SIZEOF_SHORT == 2 - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif SIZEOF_SHORT == 4 - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #elif defined(SHRT_MAX) #if SHRT_MAX == 0x7fff - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif SHRT_MAX == 0x7fffffff - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #else if(sizeof(short) == 2) { - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); } else if(sizeof(short) == 4) { - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); } else { - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); } #endif } -msgpack_pack_inline_func_cint(_int)(msgpack_pack_user x, int d) +static inline int msgpack_pack_int(msgpack_packer* x, int d) { #if defined(SIZEOF_INT) #if SIZEOF_INT == 2 - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif SIZEOF_INT == 4 - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #elif defined(INT_MAX) #if INT_MAX == 0x7fff - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif INT_MAX == 0x7fffffff - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #else if(sizeof(int) == 2) { - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); } else if(sizeof(int) == 4) { - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); } else { - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); } #endif } -msgpack_pack_inline_func_cint(_long)(msgpack_pack_user x, long d) +static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) #if SIZEOF_LONG == 2 - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif SIZEOF_LONG == 4 - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) #if LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif LONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #else if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); } else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); } else { - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); } #endif } -msgpack_pack_inline_func_cint(_long_long)(msgpack_pack_user x, long long d) +static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { #if defined(SIZEOF_LONG_LONG) #if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #elif defined(LLONG_MAX) #if LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); #elif LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif #else if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); } else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); } else { - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); } #endif } -msgpack_pack_inline_func_cint(_unsigned_short)(msgpack_pack_user x, unsigned short d) +static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) { #if defined(SIZEOF_SHORT) #if SIZEOF_SHORT == 2 - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif SIZEOF_SHORT == 4 - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #elif defined(USHRT_MAX) #if USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #else if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); } else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); } else { - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); } #endif } -msgpack_pack_inline_func_cint(_unsigned_int)(msgpack_pack_user x, unsigned int d) +static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) { #if defined(SIZEOF_INT) #if SIZEOF_INT == 2 - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif SIZEOF_INT == 4 - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #elif defined(UINT_MAX) #if UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #else if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); } else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); } else { - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); } #endif } -msgpack_pack_inline_func_cint(_unsigned_long)(msgpack_pack_user x, unsigned long d) +static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) { #if defined(SIZEOF_LONG) #if SIZEOF_LONG == 2 - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif SIZEOF_LONG == 4 - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #elif defined(ULONG_MAX) #if ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #else if(sizeof(unsigned long) == 2) { - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); } else if(sizeof(unsigned long) == 4) { - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); } else { - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); } #endif } -msgpack_pack_inline_func_cint(_unsigned_long_long)(msgpack_pack_user x, unsigned long long d) +static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { #if defined(SIZEOF_LONG_LONG) #if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #elif defined(ULLONG_MAX) #if ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); #elif ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); #endif #else if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); + msgpack_pack_real_uint16(x, d); } else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); + msgpack_pack_real_uint32(x, d); } else { - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_uint64(x, d); } #endif } -#undef msgpack_pack_inline_func_cint -#endif +//#undef msgpack_pack_inline_func_cint +//#endif @@ -632,27 +564,27 @@ if(sizeof(unsigned long long) == 2) { * Float */ -msgpack_pack_inline_func(_float)(msgpack_pack_user x, float d) +static inline int msgpack_pack_float(msgpack_packer* x, float d) { - union { float f; uint32_t i; } mem; - mem.f = d; - unsigned char buf[5]; - buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); - msgpack_pack_append_buffer(x, buf, 5); + union { float f; uint32_t i; } mem; + mem.f = d; + unsigned char buf[5]; + buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); + msgpack_pack_append_buffer(x, buf, 5); } -msgpack_pack_inline_func(_double)(msgpack_pack_user x, double d) +static inline int msgpack_pack_double(msgpack_packer* x, double d) { - union { double f; uint64_t i; } mem; - mem.f = d; - unsigned char buf[9]; - buf[0] = 0xcb; + union { double f; uint64_t i; } mem; + mem.f = d; + unsigned char buf[9]; + buf[0] = 0xcb; #if defined(__arm__) && !(__ARM_EABI__) // arm-oabi // https://github.com/msgpack/msgpack-perl/pull/1 mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); #endif _msgpack_store64(&buf[1], mem.i); - msgpack_pack_append_buffer(x, buf, 9); + msgpack_pack_append_buffer(x, buf, 9); } @@ -660,10 +592,10 @@ msgpack_pack_inline_func(_double)(msgpack_pack_user x, double d) * Nil */ -msgpack_pack_inline_func(_nil)(msgpack_pack_user x) +static inline int msgpack_pack_nil(msgpack_packer* x) { - static const unsigned char d = 0xc0; - msgpack_pack_append_buffer(x, &d, 1); + static const unsigned char d = 0xc0; + msgpack_pack_append_buffer(x, &d, 1); } @@ -671,16 +603,16 @@ msgpack_pack_inline_func(_nil)(msgpack_pack_user x) * Boolean */ -msgpack_pack_inline_func(_true)(msgpack_pack_user x) +static inline int msgpack_pack_true(msgpack_packer* x) { - static const unsigned char d = 0xc3; - msgpack_pack_append_buffer(x, &d, 1); + static const unsigned char d = 0xc3; + msgpack_pack_append_buffer(x, &d, 1); } -msgpack_pack_inline_func(_false)(msgpack_pack_user x) +static inline int msgpack_pack_false(msgpack_packer* x) { - static const unsigned char d = 0xc2; - msgpack_pack_append_buffer(x, &d, 1); + static const unsigned char d = 0xc2; + msgpack_pack_append_buffer(x, &d, 1); } @@ -688,20 +620,20 @@ msgpack_pack_inline_func(_false)(msgpack_pack_user x) * Array */ -msgpack_pack_inline_func(_array)(msgpack_pack_user x, unsigned int n) +static inline int msgpack_pack_array(msgpack_packer* x, unsigned int n) { - if(n < 16) { - unsigned char d = 0x90 | n; - msgpack_pack_append_buffer(x, &d, 1); - } else if(n < 65536) { - unsigned char buf[3]; - buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n); - msgpack_pack_append_buffer(x, buf, 5); - } + if(n < 16) { + unsigned char d = 0x90 | n; + msgpack_pack_append_buffer(x, &d, 1); + } else if(n < 65536) { + unsigned char buf[3]; + buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n); + msgpack_pack_append_buffer(x, buf, 5); + } } @@ -709,20 +641,20 @@ msgpack_pack_inline_func(_array)(msgpack_pack_user x, unsigned int n) * Map */ -msgpack_pack_inline_func(_map)(msgpack_pack_user x, unsigned int n) +static inline int msgpack_pack_map(msgpack_packer* x, unsigned int n) { - if(n < 16) { - unsigned char d = 0x80 | n; - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); - } else if(n < 65536) { - unsigned char buf[3]; - buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n); - msgpack_pack_append_buffer(x, buf, 5); - } + if(n < 16) { + unsigned char d = 0x80 | n; + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); + } else if(n < 65536) { + unsigned char buf[3]; + buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n); + msgpack_pack_append_buffer(x, buf, 5); + } } @@ -730,29 +662,112 @@ msgpack_pack_inline_func(_map)(msgpack_pack_user x, unsigned int n) * Raw */ -msgpack_pack_inline_func(_raw)(msgpack_pack_user x, size_t l) +static inline int msgpack_pack_raw(msgpack_packer* x, size_t l) +{ + if (l < 32) { + unsigned char d = 0xa0 | (uint8_t)l; + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); + } else if (x->use_bin_type && l < 256) { // str8 is new format introduced with bin. + unsigned char buf[2] = {0xd9, (uint8_t)l}; + msgpack_pack_append_buffer(x, buf, 2); + } else if (l < 65536) { + unsigned char buf[3]; + buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l); + msgpack_pack_append_buffer(x, buf, 5); + } +} + +/* + * bin + */ +static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) { - if(l < 32) { - unsigned char d = 0xa0 | (uint8_t)l; - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); - } else if(l < 65536) { - unsigned char buf[3]; - buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l); - msgpack_pack_append_buffer(x, buf, 5); - } -} - -msgpack_pack_inline_func(_raw_body)(msgpack_pack_user x, const void* b, size_t l) + if (!x->use_bin_type) { + return msgpack_pack_raw(x, l); + } + if (l < 256) { + unsigned char buf[2] = {0xc4, (unsigned char)l}; + msgpack_pack_append_buffer(x, buf, 2); + } else if (l < 65536) { + unsigned char buf[3] = {0xc5}; + _msgpack_store16(&buf[1], (uint16_t)l); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5] = {0xc6}; + _msgpack_store32(&buf[1], (uint32_t)l); + msgpack_pack_append_buffer(x, buf, 5); + } +} + +static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l) { - msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + return 0; } -#undef msgpack_pack_inline_func -#undef msgpack_pack_user +/* + * Ext + */ +static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) +{ + if (l == 1) { + unsigned char buf[2]; + buf[0] = 0xd4; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 2) { + unsigned char buf[2]; + buf[0] = 0xd5; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 4) { + unsigned char buf[2]; + buf[0] = 0xd6; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 8) { + unsigned char buf[2]; + buf[0] = 0xd7; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 16) { + unsigned char buf[2]; + buf[0] = 0xd8; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l < 256) { + unsigned char buf[3]; + buf[0] = 0xc7; + buf[1] = l; + buf[2] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 3); + } else if(l < 65536) { + unsigned char buf[4]; + buf[0] = 0xc8; + _msgpack_store16(&buf[1], (uint16_t)l); + buf[3] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 4); + } else { + unsigned char buf[6]; + buf[0] = 0xc9; + _msgpack_store32(&buf[1], (uint32_t)l); + buf[5] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 6); + } + +} + + + #undef msgpack_pack_append_buffer #undef TAKE8_8 @@ -768,4 +783,3 @@ msgpack_pack_inline_func(_raw_body)(msgpack_pack_user x, const void* b, size_t l #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 - diff --git a/pandas/src/msgpack/sysdep.h b/pandas/src/msgpack/sysdep.h index 4fedbd8ba472f..ed9c1bc0b8031 100644 --- a/pandas/src/msgpack/sysdep.h +++ b/pandas/src/msgpack/sysdep.h @@ -192,4 +192,3 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif /* msgpack/sysdep.h */ - diff --git a/pandas/src/msgpack/unpack.h b/pandas/src/msgpack/unpack.h index 3dc88e5fbded0..5deb7cde0b929 100644 --- a/pandas/src/msgpack/unpack.h +++ b/pandas/src/msgpack/unpack.h @@ -24,35 +24,23 @@ typedef struct unpack_user { PyObject *object_hook; bool has_pairs_hook; PyObject *list_hook; + PyObject *ext_hook; const char *encoding; const char *unicode_errors; + Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; +typedef PyObject* msgpack_unpack_object; +struct unpack_context; +typedef struct unpack_context unpack_context; +typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); -#define msgpack_unpack_struct(name) \ - struct template ## name - -#define msgpack_unpack_func(ret, name) \ - static inline ret template ## name - -#define msgpack_unpack_callback(name) \ - template_callback ## name - -#define msgpack_unpack_object PyObject* - -#define msgpack_unpack_user unpack_user - -typedef int (*execute_fn)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off); - -struct template_context; -typedef struct template_context template_context; - -static inline msgpack_unpack_object template_callback_root(unpack_user* u) +static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) { return NULL; } -static inline int template_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { PyObject *p = PyInt_FromLong((long)d); if (!p) @@ -60,36 +48,36 @@ static inline int template_callback_uint16(unpack_user* u, uint16_t d, msgpack_u *o = p; return 0; } -static inline int template_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) { - return template_callback_uint16(u, d, o); + return unpack_callback_uint16(u, d, o); } -static inline int template_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p; - if (d > LONG_MAX) { - p = PyLong_FromUnsignedLong((unsigned long)d); - } else { - p = PyInt_FromLong((long)d); - } + PyObject *p = PyInt_FromSize_t((size_t)d); if (!p) return -1; *o = p; return 0; } -static inline int template_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromUnsignedLongLong(d); + PyObject *p; + if (d > LONG_MAX) { + p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); + } else { + p = PyInt_FromSize_t((size_t)d); + } if (!p) return -1; *o = p; return 0; } -static inline int template_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { PyObject *p = PyInt_FromLong(d); if (!p) @@ -98,26 +86,29 @@ static inline int template_callback_int32(unpack_user* u, int32_t d, msgpack_unp return 0; } -static inline int template_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) { - return template_callback_int32(u, d, o); + return unpack_callback_int32(u, d, o); } -static inline int template_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) { - return template_callback_int32(u, d, o); + return unpack_callback_int32(u, d, o); } -static inline int template_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLongLong(d); - if (!p) - return -1; + PyObject *p; + if (d > LONG_MAX || d < LONG_MIN) { + p = PyLong_FromLongLong((unsigned PY_LONG_LONG)d); + } else { + p = PyInt_FromLong((long)d); + } *o = p; return 0; } -static inline int template_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) +static inline int unpack_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) { PyObject *p = PyFloat_FromDouble(d); if (!p) @@ -126,22 +117,26 @@ static inline int template_callback_double(unpack_user* u, double d, msgpack_unp return 0; } -static inline int template_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) +static inline int unpack_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) { - return template_callback_double(u, d, o); + return unpack_callback_double(u, d, o); } -static inline int template_callback_nil(unpack_user* u, msgpack_unpack_object* o) +static inline int unpack_callback_nil(unpack_user* u, msgpack_unpack_object* o) { Py_INCREF(Py_None); *o = Py_None; return 0; } -static inline int template_callback_true(unpack_user* u, msgpack_unpack_object* o) +static inline int unpack_callback_true(unpack_user* u, msgpack_unpack_object* o) { Py_INCREF(Py_True); *o = Py_True; return 0; } -static inline int template_callback_false(unpack_user* u, msgpack_unpack_object* o) +static inline int unpack_callback_false(unpack_user* u, msgpack_unpack_object* o) { Py_INCREF(Py_False); *o = Py_False; return 0; } -static inline int template_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) +static inline int unpack_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) { + if (n > u->max_array_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_array_len(%zd)", n, u->max_array_len); + return -1; + } PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n); if (!p) @@ -150,7 +145,7 @@ static inline int template_callback_array(unpack_user* u, unsigned int n, msgpac return 0; } -static inline int template_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) +static inline int unpack_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) { if (u->use_list) PyList_SET_ITEM(*c, current, o); @@ -159,10 +154,10 @@ static inline int template_callback_array_item(unpack_user* u, unsigned int curr return 0; } -static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c) +static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) { if (u->list_hook) { - PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL); if (!new_c) return -1; Py_DECREF(*c); @@ -171,8 +166,12 @@ static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_obj return 0; } -static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) +static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) { + if (n > u->max_map_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_map_len(%zd)", n, u->max_map_len); + return -1; + } PyObject *p; if (u->has_pairs_hook) { p = PyList_New(n); // Or use tuple? @@ -186,7 +185,7 @@ static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_ return 0; } -static inline int template_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) +static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { if (u->has_pairs_hook) { msgpack_unpack_object item = PyTuple_Pack(2, k, v); @@ -205,10 +204,10 @@ static inline int template_callback_map_item(unpack_user* u, unsigned int curren return -1; } -static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c) +static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) { if (u->object_hook) { - PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL); if (!new_c) return -1; @@ -218,8 +217,13 @@ static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_objec return 0; } -static inline int template_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) +static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) { + if (l > u->max_str_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_str_len(%zd)", l, u->max_str_len); + return -1; + } + PyObject *py; if(u->encoding) { py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); @@ -232,4 +236,43 @@ static inline int template_callback_raw(unpack_user* u, const char* b, const cha return 0; } +static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) +{ + if (l > u->max_bin_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_bin_len(%zd)", l, u->max_bin_len); + return -1; + } + + PyObject *py = PyBytes_FromStringAndSize(p, l); + if (!py) + return -1; + *o = py; + return 0; +} + +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length, msgpack_unpack_object* o) +{ + PyObject *py; + int8_t typecode = (int8_t)*pos++; + if (!u->ext_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); + return -1; + } + if (length-1 > u->max_ext_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); + return -1; + } + // length also includes the typecode, so the actual data is length-1 +#if PY_MAJOR_VERSION == 2 + py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, length-1); +#else + py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, length-1); +#endif + if (!py) + return -1; + *o = py; + return 0; +} + #include "unpack_template.h" diff --git a/pandas/src/msgpack/unpack_define.h b/pandas/src/msgpack/unpack_define.h index 959d3519e7b5c..0dd708d17c3d4 100644 --- a/pandas/src/msgpack/unpack_define.h +++ b/pandas/src/msgpack/unpack_define.h @@ -34,54 +34,57 @@ extern "C" { #endif +// CS is first byte & 0x1f typedef enum { - CS_HEADER = 0x00, // nil - - //CS_ = 0x01, - //CS_ = 0x02, // false - //CS_ = 0x03, // true - - //CS_ = 0x04, - //CS_ = 0x05, - //CS_ = 0x06, - //CS_ = 0x07, - - //CS_ = 0x08, - //CS_ = 0x09, - CS_FLOAT = 0x0a, - CS_DOUBLE = 0x0b, - CS_UINT_8 = 0x0c, - CS_UINT_16 = 0x0d, - CS_UINT_32 = 0x0e, - CS_UINT_64 = 0x0f, - CS_INT_8 = 0x10, - CS_INT_16 = 0x11, - CS_INT_32 = 0x12, - CS_INT_64 = 0x13, - - //CS_ = 0x14, - //CS_ = 0x15, - //CS_BIG_INT_16 = 0x16, - //CS_BIG_INT_32 = 0x17, - //CS_BIG_FLOAT_16 = 0x18, - //CS_BIG_FLOAT_32 = 0x19, - CS_RAW_16 = 0x1a, - CS_RAW_32 = 0x1b, - CS_ARRAY_16 = 0x1c, - CS_ARRAY_32 = 0x1d, - CS_MAP_16 = 0x1e, - CS_MAP_32 = 0x1f, - - //ACS_BIG_INT_VALUE, - //ACS_BIG_FLOAT_VALUE, - ACS_RAW_VALUE, + CS_HEADER = 0x00, // nil + + //CS_ = 0x01, + //CS_ = 0x02, // false + //CS_ = 0x03, // true + + CS_BIN_8 = 0x04, + CS_BIN_16 = 0x05, + CS_BIN_32 = 0x06, + + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, + + CS_FLOAT = 0x0a, + CS_DOUBLE = 0x0b, + CS_UINT_8 = 0x0c, + CS_UINT_16 = 0x0d, + CS_UINT_32 = 0x0e, + CS_UINT_64 = 0x0f, + CS_INT_8 = 0x10, + CS_INT_16 = 0x11, + CS_INT_32 = 0x12, + CS_INT_64 = 0x13, + + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, + + CS_RAW_8 = 0x19, + CS_RAW_16 = 0x1a, + CS_RAW_32 = 0x1b, + CS_ARRAY_16 = 0x1c, + CS_ARRAY_32 = 0x1d, + CS_MAP_16 = 0x1e, + CS_MAP_32 = 0x1f, + + ACS_RAW_VALUE, + ACS_BIN_VALUE, + ACS_EXT_VALUE, } msgpack_unpack_state; typedef enum { - CT_ARRAY_ITEM, - CT_MAP_KEY, - CT_MAP_VALUE, + CT_ARRAY_ITEM, + CT_MAP_KEY, + CT_MAP_VALUE, } msgpack_container_type; @@ -90,4 +93,3 @@ typedef enum { #endif #endif /* msgpack/unpack_define.h */ - diff --git a/pandas/src/msgpack/unpack_template.h b/pandas/src/msgpack/unpack_template.h index 83b6918dc6686..d34eceda6ab69 100644 --- a/pandas/src/msgpack/unpack_template.h +++ b/pandas/src/msgpack/unpack_template.h @@ -16,167 +16,142 @@ * limitations under the License. */ -#ifndef msgpack_unpack_func -#error msgpack_unpack_func template is not defined -#endif - -#ifndef msgpack_unpack_callback -#error msgpack_unpack_callback template is not defined -#endif - -#ifndef msgpack_unpack_struct -#error msgpack_unpack_struct template is not defined -#endif - -#ifndef msgpack_unpack_struct_decl -#define msgpack_unpack_struct_decl(name) msgpack_unpack_struct(name) -#endif - -#ifndef msgpack_unpack_object -#error msgpack_unpack_object type is not defined -#endif - -#ifndef msgpack_unpack_user -#error msgpack_unpack_user type is not defined -#endif - #ifndef USE_CASE_RANGE #if !defined(_MSC_VER) #define USE_CASE_RANGE #endif #endif -msgpack_unpack_struct_decl(_stack) { - msgpack_unpack_object obj; - size_t size; - size_t count; - unsigned int ct; - msgpack_unpack_object map_key; -}; - -msgpack_unpack_struct_decl(_context) { - msgpack_unpack_user user; - unsigned int cs; - unsigned int trail; - unsigned int top; - /* - msgpack_unpack_struct(_stack)* stack; - unsigned int stack_size; - msgpack_unpack_struct(_stack) embed_stack[MSGPACK_EMBED_STACK_SIZE]; - */ - msgpack_unpack_struct(_stack) stack[MSGPACK_EMBED_STACK_SIZE]; +typedef struct unpack_stack { + PyObject* obj; + size_t size; + size_t count; + unsigned int ct; + PyObject* map_key; +} unpack_stack; + +struct unpack_context { + unpack_user user; + unsigned int cs; + unsigned int trail; + unsigned int top; + /* + unpack_stack* stack; + unsigned int stack_size; + unpack_stack embed_stack[MSGPACK_EMBED_STACK_SIZE]; + */ + unpack_stack stack[MSGPACK_EMBED_STACK_SIZE]; }; -msgpack_unpack_func(void, _init)(msgpack_unpack_struct(_context)* ctx) +static inline void unpack_init(unpack_context* ctx) { - ctx->cs = CS_HEADER; - ctx->trail = 0; - ctx->top = 0; - /* - ctx->stack = ctx->embed_stack; - ctx->stack_size = MSGPACK_EMBED_STACK_SIZE; - */ - ctx->stack[0].obj = msgpack_unpack_callback(_root)(&ctx->user); + ctx->cs = CS_HEADER; + ctx->trail = 0; + ctx->top = 0; + /* + ctx->stack = ctx->embed_stack; + ctx->stack_size = MSGPACK_EMBED_STACK_SIZE; + */ + ctx->stack[0].obj = unpack_callback_root(&ctx->user); } /* -msgpack_unpack_func(void, _destroy)(msgpack_unpack_struct(_context)* ctx) +static inline void unpack_destroy(unpack_context* ctx) { - if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) { - free(ctx->stack); - } + if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) { + free(ctx->stack); + } } */ -msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context)* ctx) +static inline PyObject* unpack_data(unpack_context* ctx) { - return (ctx)->stack[0].obj; + return (ctx)->stack[0].obj; } template -msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off) { - assert(len >= *off); + assert(len >= *off); - const unsigned char* p = (unsigned char*)data + *off; - const unsigned char* const pe = (unsigned char*)data + len; - const void* n = NULL; + const unsigned char* p = (unsigned char*)data + *off; + const unsigned char* const pe = (unsigned char*)data + len; + const void* n = NULL; - unsigned int trail = ctx->trail; - unsigned int cs = ctx->cs; - unsigned int top = ctx->top; - msgpack_unpack_struct(_stack)* stack = ctx->stack; - /* - unsigned int stack_size = ctx->stack_size; - */ - msgpack_unpack_user* user = &ctx->user; + unsigned int trail = ctx->trail; + unsigned int cs = ctx->cs; + unsigned int top = ctx->top; + unpack_stack* stack = ctx->stack; + /* + unsigned int stack_size = ctx->stack_size; + */ + unpack_user* user = &ctx->user; - msgpack_unpack_object obj; - msgpack_unpack_struct(_stack)* c = NULL; + PyObject* obj; + unpack_stack* c = NULL; - int ret; + int ret; #define construct_cb(name) \ - construct && msgpack_unpack_callback(name) + construct && unpack_callback ## name #define push_simple_value(func) \ - if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ - goto _push + if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ + goto _push #define push_fixed_value(func, arg) \ - if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ - goto _push + if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ + goto _push #define push_variable_value(func, base, pos, len) \ - if(construct_cb(func)(user, \ - (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ - goto _push + if(construct_cb(func)(user, \ + (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ + goto _push #define again_fixed_trail(_cs, trail_len) \ - trail = trail_len; \ - cs = _cs; \ - goto _fixed_trail_again + trail = trail_len; \ + cs = _cs; \ + goto _fixed_trail_again #define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \ - trail = trail_len; \ - if(trail == 0) { goto ifzero; } \ - cs = _cs; \ - goto _fixed_trail_again + trail = trail_len; \ + if(trail == 0) { goto ifzero; } \ + cs = _cs; \ + goto _fixed_trail_again #define start_container(func, count_, ct_) \ - if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ - if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ - if((count_) == 0) { obj = stack[top].obj; \ - if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ - goto _push; } \ - stack[top].ct = ct_; \ - stack[top].size = count_; \ - stack[top].count = 0; \ - ++top; \ - /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ - /*printf("stack push %d\n", top);*/ \ - /* FIXME \ - if(top >= stack_size) { \ - if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ - size_t csize = sizeof(msgpack_unpack_struct(_stack)) * MSGPACK_EMBED_STACK_SIZE; \ - size_t nsize = csize * 2; \ - msgpack_unpack_struct(_stack)* tmp = (msgpack_unpack_struct(_stack)*)malloc(nsize); \ - if(tmp == NULL) { goto _failed; } \ - memcpy(tmp, ctx->stack, csize); \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ - } else { \ - size_t nsize = sizeof(msgpack_unpack_struct(_stack)) * ctx->stack_size * 2; \ - msgpack_unpack_struct(_stack)* tmp = (msgpack_unpack_struct(_stack)*)realloc(ctx->stack, nsize); \ - if(tmp == NULL) { goto _failed; } \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = stack_size * 2; \ - } \ - } \ - */ \ - goto _header_again - -#define NEXT_CS(p) \ - ((unsigned int)*p & 0x1f) + if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ + if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ + if((count_) == 0) { obj = stack[top].obj; \ + if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ + goto _push; } \ + stack[top].ct = ct_; \ + stack[top].size = count_; \ + stack[top].count = 0; \ + ++top; \ + /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ + /*printf("stack push %d\n", top);*/ \ + /* FIXME \ + if(top >= stack_size) { \ + if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ + size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \ + size_t nsize = csize * 2; \ + unpack_stack* tmp = (unpack_stack*)malloc(nsize); \ + if(tmp == NULL) { goto _failed; } \ + memcpy(tmp, ctx->stack, csize); \ + ctx->stack = stack = tmp; \ + ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ + } else { \ + size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \ + unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \ + if(tmp == NULL) { goto _failed; } \ + ctx->stack = stack = tmp; \ + ctx->stack_size = stack_size = stack_size * 2; \ + } \ + } \ + */ \ + goto _header_again + +#define NEXT_CS(p) ((unsigned int)*p & 0x1f) #ifdef USE_CASE_RANGE #define SWITCH_RANGE_BEGIN switch(*p) { @@ -190,221 +165,235 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c #define SWITCH_RANGE_END } } #endif - if(p == pe) { goto _out; } - do { - switch(cs) { - case CS_HEADER: - SWITCH_RANGE_BEGIN - SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum - push_fixed_value(_uint8, *(uint8_t*)p); - SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum - push_fixed_value(_int8, *(int8_t*)p); - SWITCH_RANGE(0xc0, 0xdf) // Variable - switch(*p) { - case 0xc0: // nil - push_simple_value(_nil); - //case 0xc1: // string - // again_terminal_trail(NEXT_CS(p), p+1); - case 0xc2: // false - push_simple_value(_false); - case 0xc3: // true - push_simple_value(_true); - //case 0xc4: - //case 0xc5: - //case 0xc6: - //case 0xc7: - //case 0xc8: - //case 0xc9: - case 0xca: // float - case 0xcb: // double - case 0xcc: // unsigned int 8 - case 0xcd: // unsigned int 16 - case 0xce: // unsigned int 32 - case 0xcf: // unsigned int 64 - case 0xd0: // signed int 8 - case 0xd1: // signed int 16 - case 0xd2: // signed int 32 - case 0xd3: // signed int 64 - again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - //case 0xd4: - //case 0xd5: - //case 0xd6: // big integer 16 - //case 0xd7: // big integer 32 - //case 0xd8: // big float 16 - //case 0xd9: // big float 32 - case 0xda: // raw 16 - case 0xdb: // raw 32 - case 0xdc: // array 16 - case 0xdd: // array 32 - case 0xde: // map 16 - case 0xdf: // map 32 - again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); - default: - goto _failed; - } - SWITCH_RANGE(0xa0, 0xbf) // FixRaw - again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); - SWITCH_RANGE(0x90, 0x9f) // FixArray - start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); - SWITCH_RANGE(0x80, 0x8f) // FixMap - start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); - - SWITCH_RANGE_DEFAULT - goto _failed; - SWITCH_RANGE_END - // end CS_HEADER - - - _fixed_trail_again: - ++p; - - default: - if((size_t)(pe - p) < trail) { goto _out; } - n = p; p += trail - 1; - switch(cs) { - //case CS_ - //case CS_ - case CS_FLOAT: { - union { uint32_t i; float f; } mem; - mem.i = _msgpack_load32(uint32_t,n); - push_fixed_value(_float, mem.f); } - case CS_DOUBLE: { - union { uint64_t i; double f; } mem; - mem.i = _msgpack_load64(uint64_t,n); + if(p == pe) { goto _out; } + do { + switch(cs) { + case CS_HEADER: + SWITCH_RANGE_BEGIN + SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum + push_fixed_value(_uint8, *(uint8_t*)p); + SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum + push_fixed_value(_int8, *(int8_t*)p); + SWITCH_RANGE(0xc0, 0xdf) // Variable + switch(*p) { + case 0xc0: // nil + push_simple_value(_nil); + //case 0xc1: // never used + case 0xc2: // false + push_simple_value(_false); + case 0xc3: // true + push_simple_value(_true); + case 0xc4: // bin 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc5: // bin 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc6: // bin 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xca: // float + case 0xcb: // double + case 0xcc: // unsigned int 8 + case 0xcd: // unsigned int 16 + case 0xce: // unsigned int 32 + case 0xcf: // unsigned int 64 + case 0xd0: // signed int 8 + case 0xd1: // signed int 16 + case 0xd2: // signed int 32 + case 0xd3: // signed int 64 + again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + case 0xd9: // str 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xda: // raw 16 + case 0xdb: // raw 32 + case 0xdc: // array 16 + case 0xdd: // array 32 + case 0xde: // map 16 + case 0xdf: // map 32 + again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); + default: + goto _failed; + } + SWITCH_RANGE(0xa0, 0xbf) // FixRaw + again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); + SWITCH_RANGE(0x90, 0x9f) // FixArray + start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); + SWITCH_RANGE(0x80, 0x8f) // FixMap + start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); + + SWITCH_RANGE_DEFAULT + goto _failed; + SWITCH_RANGE_END + // end CS_HEADER + + + _fixed_trail_again: + ++p; + + default: + if((size_t)(pe - p) < trail) { goto _out; } + n = p; p += trail - 1; + switch(cs) { + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); + case CS_FLOAT: { + union { uint32_t i; float f; } mem; + mem.i = _msgpack_load32(uint32_t,n); + push_fixed_value(_float, mem.f); } + case CS_DOUBLE: { + union { uint64_t i; double f; } mem; + mem.i = _msgpack_load64(uint64_t,n); #if defined(__arm__) && !(__ARM_EABI__) // arm-oabi // https://github.com/msgpack/msgpack-perl/pull/1 mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); #endif - push_fixed_value(_double, mem.f); } - case CS_UINT_8: - push_fixed_value(_uint8, *(uint8_t*)n); - case CS_UINT_16: - push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); - case CS_UINT_32: - push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); - case CS_UINT_64: - push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); - - case CS_INT_8: - push_fixed_value(_int8, *(int8_t*)n); - case CS_INT_16: - push_fixed_value(_int16, _msgpack_load16(int16_t,n)); - case CS_INT_32: - push_fixed_value(_int32, _msgpack_load32(int32_t,n)); - case CS_INT_64: - push_fixed_value(_int64, _msgpack_load64(int64_t,n)); - - //case CS_ - //case CS_ - //case CS_BIG_INT_16: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero); - //case CS_BIG_INT_32: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero); - //case ACS_BIG_INT_VALUE: - //_big_int_zero: - // // FIXME - // push_variable_value(_big_int, data, n, trail); - - //case CS_BIG_FLOAT_16: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero); - //case CS_BIG_FLOAT_32: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero); - //case ACS_BIG_FLOAT_VALUE: - //_big_float_zero: - // // FIXME - // push_variable_value(_big_float, data, n, trail); - - case CS_RAW_16: - again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); - case CS_RAW_32: - again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); - case ACS_RAW_VALUE: - _raw_zero: - push_variable_value(_raw, data, n, trail); - - case CS_ARRAY_16: - start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); - case CS_ARRAY_32: - /* FIXME security guard */ - start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); - - case CS_MAP_16: - start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); - case CS_MAP_32: - /* FIXME security guard */ - start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); - - default: - goto _failed; - } - } + push_fixed_value(_double, mem.f); } + case CS_UINT_8: + push_fixed_value(_uint8, *(uint8_t*)n); + case CS_UINT_16: + push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); + case CS_UINT_32: + push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); + case CS_UINT_64: + push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); + + case CS_INT_8: + push_fixed_value(_int8, *(int8_t*)n); + case CS_INT_16: + push_fixed_value(_int16, _msgpack_load16(int16_t,n)); + case CS_INT_32: + push_fixed_value(_int32, _msgpack_load32(int32_t,n)); + case CS_INT_64: + push_fixed_value(_int64, _msgpack_load64(int64_t,n)); + + case CS_BIN_8: + again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); + case CS_BIN_16: + again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero); + case CS_BIN_32: + again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero); + case ACS_BIN_VALUE: + _bin_zero: + push_variable_value(_bin, data, n, trail); + + case CS_RAW_8: + again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero); + case CS_RAW_16: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); + case CS_RAW_32: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); + case ACS_RAW_VALUE: + _raw_zero: + push_variable_value(_raw, data, n, trail); + + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + + case CS_ARRAY_16: + start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); + case CS_ARRAY_32: + /* FIXME security guard */ + start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); + + case CS_MAP_16: + start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); + case CS_MAP_32: + /* FIXME security guard */ + start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); + + default: + goto _failed; + } + } _push: - if(top == 0) { goto _finish; } - c = &stack[top-1]; - switch(c->ct) { - case CT_ARRAY_ITEM: - if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } - if(++c->count == c->size) { - obj = c->obj; - if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; } - --top; - /*printf("stack pop %d\n", top);*/ - goto _push; - } - goto _header_again; - case CT_MAP_KEY: - c->map_key = obj; - c->ct = CT_MAP_VALUE; - goto _header_again; - case CT_MAP_VALUE: - if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } - if(++c->count == c->size) { - obj = c->obj; - if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } - --top; - /*printf("stack pop %d\n", top);*/ - goto _push; - } - c->ct = CT_MAP_KEY; - goto _header_again; - - default: - goto _failed; - } + if(top == 0) { goto _finish; } + c = &stack[top-1]; + switch(c->ct) { + case CT_ARRAY_ITEM: + if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } + if(++c->count == c->size) { + obj = c->obj; + if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + goto _header_again; + case CT_MAP_KEY: + c->map_key = obj; + c->ct = CT_MAP_VALUE; + goto _header_again; + case CT_MAP_VALUE: + if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } + if(++c->count == c->size) { + obj = c->obj; + if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + c->ct = CT_MAP_KEY; + goto _header_again; + + default: + goto _failed; + } _header_again: - cs = CS_HEADER; - ++p; - } while(p != pe); - goto _out; + cs = CS_HEADER; + ++p; + } while(p != pe); + goto _out; _finish: - if (!construct) - msgpack_unpack_callback(_nil)(user, &obj); - stack[0].obj = obj; - ++p; - ret = 1; - /*printf("-- finish --\n"); */ - goto _end; + if (!construct) + unpack_callback_nil(user, &obj); + stack[0].obj = obj; + ++p; + ret = 1; + /*printf("-- finish --\n"); */ + goto _end; _failed: - /*printf("** FAILED **\n"); */ - ret = -1; - goto _end; + /*printf("** FAILED **\n"); */ + ret = -1; + goto _end; _out: - ret = 0; - goto _end; + ret = 0; + goto _end; _end: - ctx->cs = cs; - ctx->trail = trail; - ctx->top = top; - *off = p - (const unsigned char*)data; + ctx->cs = cs; + ctx->trail = trail; + ctx->top = top; + *off = p - (const unsigned char*)data; - return ret; + return ret; #undef construct_cb } @@ -420,55 +409,55 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c #undef start_container template -msgpack_unpack_func(int, _container_header)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off) { - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; #define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; #ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: + case fixed_offset + 0x0 ... fixed_offset + 0xf: #else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: #endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; } - msgpack_unpack_callback(_uint32)(&ctx->user, size, &ctx->stack[0].obj); - return 1; + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; } #undef SWITCH_RANGE_BEGIN @@ -476,17 +465,11 @@ msgpack_unpack_func(int, _container_header)(msgpack_unpack_struct(_context)* ctx #undef SWITCH_RANGE_DEFAULT #undef SWITCH_RANGE_END -static const execute_fn template_construct = &template_execute; -static const execute_fn template_skip = &template_execute; -static const execute_fn read_array_header = &template_container_header<0x90, 0xdc>; -static const execute_fn read_map_header = &template_container_header<0x80, 0xde>; - -#undef msgpack_unpack_func -#undef msgpack_unpack_callback -#undef msgpack_unpack_struct -#undef msgpack_unpack_object -#undef msgpack_unpack_user +static const execute_fn unpack_construct = &unpack_execute; +static const execute_fn unpack_skip = &unpack_execute; +static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; +static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; #undef NEXT_CS -/* vim: set ts=4 sw=4 noexpandtab */ +/* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pandas/tests/test_msgpack/test_buffer.py b/pandas/tests/test_msgpack/test_buffer.py index 940b65406103e..43f5e64012885 100644 --- a/pandas/tests/test_msgpack/test_buffer.py +++ b/pandas/tests/test_msgpack/test_buffer.py @@ -7,6 +7,14 @@ def test_unpack_buffer(): from array import array buf = array('b') - buf.fromstring(packb(('foo', 'bar'))) + buf.fromstring(packb((b'foo', b'bar'))) obj = unpackb(buf, use_list=1) assert [b'foo', b'bar'] == obj + + +def test_unpack_bytearray(): + buf = bytearray(packb(('foo', 'bar'))) + obj = unpackb(buf, use_list=1) + assert [b'foo', b'bar'] == obj + expected_type = bytes + assert all(type(s) == expected_type for s in obj) diff --git a/pandas/tests/test_msgpack/test_case.py b/pandas/tests/test_msgpack/test_case.py index e78456b2ddb62..187668b242495 100644 --- a/pandas/tests/test_msgpack/test_case.py +++ b/pandas/tests/test_msgpack/test_case.py @@ -99,3 +99,4 @@ def test_match(): def test_unicode(): assert unpackb(packb('foobar'), use_list=1) == b'foobar' + diff --git a/pandas/tests/test_msgpack/test_extension.py b/pandas/tests/test_msgpack/test_extension.py new file mode 100644 index 0000000000000..3172605c0aae1 --- /dev/null +++ b/pandas/tests/test_msgpack/test_extension.py @@ -0,0 +1,57 @@ +from __future__ import print_function +import array +import pandas.msgpack as msgpack +from pandas.msgpack import ExtType + + +def test_pack_ext_type(): + def p(s): + packer = msgpack.Packer() + packer.pack_ext_type(0x42, s) + return packer.bytes() + assert p(b'A') == b'\xd4\x42A' # fixext 1 + assert p(b'AB') == b'\xd5\x42AB' # fixext 2 + assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 + assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 + assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 + assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 + assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 + assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 + + +def test_unpack_ext_type(): + def check(b, expected): + assert msgpack.unpackb(b) == expected + + check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 + check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 + check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 + check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 + check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 + check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 + check(b'\xc8\x01\x23\x42' + b'A'*0x0123, + ExtType(0x42, b'A'*0x0123)) # ext 16 + check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, + ExtType(0x42, b'A'*0x00012345)) # ext 32 + + +def test_extension_type(): + def default(obj): + print('default called', obj) + if isinstance(obj, array.array): + typecode = 123 # application specific typecode + data = obj.tostring() + return ExtType(typecode, data) + raise TypeError("Unknwon type object %r" % (obj,)) + + def ext_hook(code, data): + print('ext_hook called', code, data) + assert code == 123 + obj = array.array('d') + obj.fromstring(data) + return obj + + obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] + s = msgpack.packb(obj, default=default) + obj2 = msgpack.unpackb(s, ext_hook=ext_hook) + assert obj == obj2 diff --git a/pandas/tests/test_msgpack/test_format.py b/pandas/tests/test_msgpack/test_format.py index a3a3afd046ce2..706c48436d7d3 100644 --- a/pandas/tests/test_msgpack/test_format.py +++ b/pandas/tests/test_msgpack/test_format.py @@ -7,7 +7,7 @@ def check(src, should, use_list=0): assert unpackb(src, use_list=use_list) == should def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", + check(b"\x93\xc0\xc2\xc3", (None, False, True,)) def testFixnum(): diff --git a/pandas/tests/test_msgpack/test_limits.py b/pandas/tests/test_msgpack/test_limits.py new file mode 100644 index 0000000000000..d9aa957182d65 --- /dev/null +++ b/pandas/tests/test_msgpack/test_limits.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals +import pandas.util.testing as tm + +from pandas.msgpack import packb, unpackb, Packer, Unpacker, ExtType + +class TestLimits(tm.TestCase): + def test_integer(self): + x = -(2 ** 63) + assert unpackb(packb(x)) == x + self.assertRaises((OverflowError, ValueError), packb, x-1) + x = 2 ** 64 - 1 + assert unpackb(packb(x)) == x + self.assertRaises((OverflowError, ValueError), packb, x+1) + + + def test_array_header(self): + packer = Packer() + packer.pack_array_header(2**32-1) + self.assertRaises((OverflowError, ValueError), + packer.pack_array_header, 2**32) + + + def test_map_header(self): + packer = Packer() + packer.pack_map_header(2**32-1) + self.assertRaises((OverflowError, ValueError), + packer.pack_array_header, 2**32) + + + def test_max_str_len(self): + d = 'x' * 3 + packed = packb(d) + + unpacker = Unpacker(max_str_len=3, encoding='utf-8') + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_str_len=2, encoding='utf-8') + unpacker.feed(packed) + self.assertRaises(ValueError, unpacker.unpack) + + + def test_max_bin_len(self): + d = b'x' * 3 + packed = packb(d, use_bin_type=True) + + unpacker = Unpacker(max_bin_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_bin_len=2) + unpacker.feed(packed) + self.assertRaises(ValueError, unpacker.unpack) + + + def test_max_array_len(self): + d = [1, 2, 3] + packed = packb(d) + + unpacker = Unpacker(max_array_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_array_len=2) + unpacker.feed(packed) + self.assertRaises(ValueError, unpacker.unpack) + + + def test_max_map_len(self): + d = {1: 2, 3: 4, 5: 6} + packed = packb(d) + + unpacker = Unpacker(max_map_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_map_len=2) + unpacker.feed(packed) + self.assertRaises(ValueError, unpacker.unpack) + + + def test_max_ext_len(self): + d = ExtType(42, b"abc") + packed = packb(d) + + unpacker = Unpacker(max_ext_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_ext_len=2) + unpacker.feed(packed) + self.assertRaises(ValueError, unpacker.unpack) diff --git a/pandas/tests/test_msgpack/test_newspec.py b/pandas/tests/test_msgpack/test_newspec.py new file mode 100644 index 0000000000000..8532ab8cfb1a4 --- /dev/null +++ b/pandas/tests/test_msgpack/test_newspec.py @@ -0,0 +1,88 @@ +# coding: utf-8 + +from pandas.msgpack import packb, unpackb, ExtType + + +def test_str8(): + header = b'\xd9' + data = b'x' * 32 + b = packb(data.decode(), use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b'\x20' + assert b[2:] == data + assert unpackb(b) == data + + data = b'x' * 255 + b = packb(data.decode(), use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b'\xff' + assert b[2:] == data + assert unpackb(b) == data + + +def test_bin8(): + header = b'\xc4' + data = b'' + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b'\x00' + assert b[2:] == data + assert unpackb(b) == data + + data = b'x' * 255 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b'\xff' + assert b[2:] == data + assert unpackb(b) == data + + +def test_bin16(): + header = b'\xc5' + data = b'x' * 256 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 3 + assert b[0:1] == header + assert b[1:3] == b'\x01\x00' + assert b[3:] == data + assert unpackb(b) == data + + data = b'x' * 65535 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 3 + assert b[0:1] == header + assert b[1:3] == b'\xff\xff' + assert b[3:] == data + assert unpackb(b) == data + + +def test_bin32(): + header = b'\xc6' + data = b'x' * 65536 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 5 + assert b[0:1] == header + assert b[1:5] == b'\x00\x01\x00\x00' + assert b[5:] == data + assert unpackb(b) == data + +def test_ext(): + def check(ext, packed): + assert packb(ext) == packed + assert unpackb(packed) == ext + check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 + check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 + check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 + check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 + check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 + # ext 8 + check(ExtType(0x42, b''), b'\xc7\x00\x42') + check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) + # ext 16 + check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) + check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) + # ext 32 + check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) + # needs large memory + #check(ExtType(0x42, b'Z'*0xffffffff), + # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/pandas/tests/test_msgpack/test_obj.py b/pandas/tests/test_msgpack/test_obj.py index 4a018bc8b87f1..886fec522d4f3 100644 --- a/pandas/tests/test_msgpack/test_obj.py +++ b/pandas/tests/test_msgpack/test_obj.py @@ -44,13 +44,13 @@ def test_decode_pairs_hook(self): assert unpacked[1] == prod_sum def test_only_one_obj_hook(self): - self.assertRaises(ValueError, unpackb, b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) + self.assertRaises(TypeError, unpackb, b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) def test_bad_hook(self): def f(): packed = packb([3, 1+2j], default=lambda o: o) unpacked = unpackb(packed, use_list=1) - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) def test_array_hook(self): packed = packb([1,2,3]) diff --git a/pandas/tests/test_msgpack/test_read_size.py b/pandas/tests/test_msgpack/test_read_size.py index db3e1deb04f8f..7cbb9c9807201 100644 --- a/pandas/tests/test_msgpack/test_read_size.py +++ b/pandas/tests/test_msgpack/test_read_size.py @@ -63,3 +63,4 @@ def test_incorrect_type_nested_map(): assert 0, 'should raise exception' except UnexpectedTypeException: assert 1, 'okay' + diff --git a/pandas/tests/test_msgpack/test_seq.py b/pandas/tests/test_msgpack/test_seq.py index e5ee68c4cab84..464ff6d0174af 100644 --- a/pandas/tests/test_msgpack/test_seq.py +++ b/pandas/tests/test_msgpack/test_seq.py @@ -1,21 +1,18 @@ #!/usr/bin/env python # coding: utf-8 -from pandas import compat -from pandas.compat import u +import io import pandas.msgpack as msgpack -binarydata = [chr(i) for i in range(256)] -binarydata = "".join(binarydata) -if compat.PY3: - binarydata = binarydata.encode('utf-8') + +binarydata = bytes(bytearray(range(256))) def gen_binary_data(idx): - data = binarydata[:idx % 300] - return data + return binarydata[:idx % 300] + def test_exceeding_unpacker_read_size(): - dumpf = compat.BytesIO() + dumpf = io.BytesIO() packer = msgpack.Packer() @@ -30,7 +27,7 @@ def test_exceeding_unpacker_read_size(): data = gen_binary_data(idx) dumpf.write(packer.pack(data)) - f = compat.BytesIO(dumpf.getvalue()) + f = io.BytesIO(dumpf.getvalue()) dumpf.close() unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1) diff --git a/pandas/tests/test_msgpack/test_sequnpack.py b/pandas/tests/test_msgpack/test_sequnpack.py index 4c3ad363e5b6e..72ceed0471437 100644 --- a/pandas/tests/test_msgpack/test_sequnpack.py +++ b/pandas/tests/test_msgpack/test_sequnpack.py @@ -82,3 +82,15 @@ def test_readbytes(self): assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') + + def test_issue124(self): + unpacker = Unpacker() + unpacker.feed(b'\xa1?\xa1!') + assert tuple(unpacker) == (b'?', b'!') + assert tuple(unpacker) == () + unpacker.feed(b"\xa1?\xa1") + assert tuple(unpacker) == (b'?',) + assert tuple(unpacker) == () + unpacker.feed(b"!") + assert tuple(unpacker) == (b'!',) + assert tuple(unpacker) == () diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/test_msgpack/test_unpack.py new file mode 100644 index 0000000000000..fe840083ae1c2 --- /dev/null +++ b/pandas/tests/test_msgpack/test_unpack.py @@ -0,0 +1,65 @@ +from io import BytesIO +import sys +from pandas.msgpack import Unpacker, packb, OutOfData, ExtType +import pandas.util.testing as tm +import nose + +class TestUnpack(tm.TestCase): + def test_unpack_array_header_from_file(self): + f = BytesIO(packb([1,2,3,4])) + unpacker = Unpacker(f) + assert unpacker.read_array_header() == 4 + assert unpacker.unpack() == 1 + assert unpacker.unpack() == 2 + assert unpacker.unpack() == 3 + assert unpacker.unpack() == 4 + self.assertRaises(OutOfData, unpacker.unpack) + + + def test_unpacker_hook_refcnt(self): + if not hasattr(sys, 'getrefcount'): + raise nose.SkipTest('no sys.getrefcount()') + result = [] + + def hook(x): + result.append(x) + return x + + basecnt = sys.getrefcount(hook) + + up = Unpacker(object_hook=hook, list_hook=hook) + + assert sys.getrefcount(hook) >= basecnt + 2 + + up.feed(packb([{}])) + up.feed(packb([{}])) + assert up.unpack() == [{}] + assert up.unpack() == [{}] + assert result == [{}, [{}], {}, [{}]] + + del up + + assert sys.getrefcount(hook) == basecnt + + + def test_unpacker_ext_hook(self): + + class MyUnpacker(Unpacker): + + def __init__(self): + super(MyUnpacker, self).__init__(ext_hook=self._hook, + encoding='utf-8') + + def _hook(self, code, data): + if code == 1: + return int(data) + else: + return ExtType(code, data) + + unpacker = MyUnpacker() + unpacker.feed(packb({'a': 1}, encoding='utf-8')) + assert unpacker.unpack() == {'a': 1} + unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8')) + assert unpacker.unpack() == {'a': 123} + unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8')) + assert unpacker.unpack() == {'a': ExtType(2, b'321')} diff --git a/pandas/tests/test_msgpack/test_unpack_raw.py b/pandas/tests/test_msgpack/test_unpack_raw.py index 0e96a79cf190a..c6bf747c8d992 100644 --- a/pandas/tests/test_msgpack/test_unpack_raw.py +++ b/pandas/tests/test_msgpack/test_unpack_raw.py @@ -1,18 +1,19 @@ """Tests for cases where the user seeks to obtain packed msgpack objects""" -from pandas import compat +import io from pandas.msgpack import Unpacker, packb + def test_write_bytes(): unpacker = Unpacker() unpacker.feed(b'abc') - f = compat.BytesIO() + f = io.BytesIO() assert unpacker.unpack(f.write) == ord('a') assert f.getvalue() == b'a' - f = compat.BytesIO() + f = io.BytesIO() assert unpacker.skip(f.write) is None assert f.getvalue() == b'b' - f = compat.BytesIO() + f = io.BytesIO() assert unpacker.skip() is None assert f.getvalue() == b'' @@ -20,9 +21,9 @@ def test_write_bytes(): def test_write_bytes_multi_buffer(): long_val = (5) * 100 expected = packb(long_val) - unpacker = Unpacker(compat.BytesIO(expected), read_size=3, max_buffer_size=3) + unpacker = Unpacker(io.BytesIO(expected), read_size=3, max_buffer_size=3) - f = compat.BytesIO() + f = io.BytesIO() unpacked = unpacker.unpack(f.write) assert unpacked == long_val assert f.getvalue() == expected diff --git a/setup.py b/setup.py index 594e62bc622b9..0a680fe5d756d 100755 --- a/setup.py +++ b/setup.py @@ -456,16 +456,22 @@ def pxd(name): else: macros = [('__LITTLE_ENDIAN__', '1')] -msgpack_ext = Extension('pandas.msgpack', - sources = [srcpath('msgpack', +packer_ext = Extension('pandas.msgpack._packer', + sources = [srcpath('_packer', suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='')], + subdir='msgpack')], language='c++', - include_dirs=common_include, + include_dirs=['pandas/src/msgpack'] + common_include, define_macros=macros) - -extensions.append(msgpack_ext) - +unpacker_ext = Extension('pandas.msgpack._unpacker', + sources = [srcpath('_unpacker', + suffix=suffix if suffix == '.pyx' else '.cpp', + subdir='msgpack')], + language='c++', + include_dirs=['pandas/src/msgpack'] + common_include, + define_macros=macros) +extensions.append(packer_ext) +extensions.append(unpacker_ext) # if not ISRELEASED: # extensions.extend([sandbox_ext]) @@ -525,6 +531,7 @@ def pxd(name): 'pandas.io.tests', 'pandas.io.tests.test_json', 'pandas.stats.tests', + 'pandas.msgpack' ], package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5', 'tests/data/legacy_pickle/*/*.pickle',