Skip to content

Commit

Permalink
feat: Byte type 'B'
Browse files Browse the repository at this point in the history
  • Loading branch information
nebkat committed Oct 2, 2024
1 parent 2c84f15 commit afa0a23
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 41 deletions.
63 changes: 32 additions & 31 deletions bjdata/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,21 @@
from functools import reduce

from .compat import raise_from, intern_unicode
from .markers import (TYPE_NONE, TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8,
from .markers import (TYPE_NONE, TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_BYTE, TYPE_INT8, TYPE_UINT8,
TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR,
TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16,
TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT)
from numpy import array as ndarray, dtype as npdtype, frombuffer as buffer2numpy, half as halfprec
from array import array as typedarray

__TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32,
TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16,
TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START, OBJECT_START))
__TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_BYTE, TYPE_INT8, TYPE_UINT8, TYPE_INT16,
TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64,
TYPE_FLOAT16, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START, OBJECT_START))
__TYPES_NO_DATA = frozenset((TYPE_NULL, TYPE_BOOL_FALSE, TYPE_BOOL_TRUE))
__TYPES_INT = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64))
__TYPES_FIXLEN = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64,
TYPE_FLOAT16, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_CHAR))
__TYPES_INT = frozenset((TYPE_BYTE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16,
TYPE_UINT32, TYPE_UINT64))
__TYPES_FIXLEN = frozenset((TYPE_BYTE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32,
TYPE_UINT64, TYPE_FLOAT16, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_CHAR))

__SMALL_INTS_DECODED = [{pack('>b', i): i for i in range(-128, 128)}, {pack('<b', i): i for i in range(-128, 128)}]
__SMALL_UINTS_DECODED = [{pack('>B', i): i for i in range(256)}, {pack('<B', i): i for i in range(256)}]
Expand All @@ -49,7 +50,8 @@
__UNPACK_FLOAT32 = [Struct('>f').unpack, Struct('<f').unpack]
__UNPACK_FLOAT64 = [Struct('>d').unpack, Struct('<d').unpack]

__DTYPE_MAP = { TYPE_INT8: 'b',
__DTYPE_MAP = { TYPE_BYTE: 'B',
TYPE_INT8: 'b',
TYPE_UINT8: 'B',
TYPE_INT16: 'h',
TYPE_UINT16: 'H',
Expand All @@ -62,7 +64,8 @@
TYPE_FLOAT64: 'd',
TYPE_CHAR: 'c'}

__DTYPELEN_MAP={ TYPE_INT8: 1,
__DTYPELEN_MAP={ TYPE_BYTE: 1,
TYPE_INT8: 1,
TYPE_UINT8: 1,
TYPE_INT16: 2,
TYPE_UINT16: 2,
Expand Down Expand Up @@ -227,6 +230,7 @@ def __decode_object_key(fp_read, marker, intern_object_keys, le=1):
__METHOD_MAP = {TYPE_NULL: (lambda _, __, ___: None),
TYPE_BOOL_TRUE: (lambda _, __, ___: True),
TYPE_BOOL_FALSE: (lambda _, __, ___: False),
TYPE_BYTE: __decode_uint8,
TYPE_INT8: __decode_int8,
TYPE_UINT8: __decode_uint8,
TYPE_INT16: __decode_int16,
Expand All @@ -248,7 +252,7 @@ def prodlist(mylist):
result = result * x
return result

def __get_container_params(fp_read, in_mapping, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle):
def __get_container_params(fp_read, in_mapping, object_hook, object_pairs_hook, intern_object_keys, islittle):
marker = fp_read(1)
dims = []
if marker == CONTAINER_TYPE:
Expand All @@ -262,15 +266,15 @@ def __get_container_params(fp_read, in_mapping, no_bytes, object_hook, object_pa
if marker == CONTAINER_COUNT:
marker = fp_read(1)
if marker == ARRAY_START:
dims = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle)
dims = __decode_array(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle)
count = prodlist(dims)
else:
count = __decode_int_non_negative(fp_read, marker, islittle)
counting = True

# special cases (no data (None or bool) / bytes array) will be handled in calling functions
if not (type_ in __TYPES_NO_DATA or
(type_ == TYPE_UINT8 and not in_mapping and not no_bytes)):
(type_ == TYPE_BYTE and not in_mapping)):
# Reading ahead is just to capture type, which will not exist if type is fixed
marker = fp_read(1) if (in_mapping or type_ == TYPE_NONE) else type_

Expand All @@ -283,9 +287,9 @@ def __get_container_params(fp_read, in_mapping, no_bytes, object_hook, object_pa
return marker, counting, count, type_, dims


def __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, # pylint: disable=too-many-branches
def __decode_object(fp_read, object_hook, object_pairs_hook, # pylint: disable=too-many-branches
intern_object_keys, islittle):
marker, counting, count, type_, dims = __get_container_params(fp_read, True, no_bytes,object_hook, object_pairs_hook,intern_object_keys, islittle)
marker, counting, count, type_, dims = __get_container_params(fp_read, True, object_hook, object_pairs_hook,intern_object_keys, islittle)
has_pairs_hook = object_pairs_hook is not None
obj = [] if has_pairs_hook else {}

Expand Down Expand Up @@ -323,9 +327,9 @@ def __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, # pylint
# handle outside above except (on KeyError) so do not have unfriendly "exception within except" backtrace
if not handled:
if marker == ARRAY_START:
value = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle)
value = __decode_array(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle)
elif marker == OBJECT_START:
value = __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle)
value = __decode_object(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle)
else:
raise DecoderException('Invalid marker within object')

Expand All @@ -341,15 +345,15 @@ def __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, # pylint
return object_pairs_hook(obj) if has_pairs_hook else object_hook(obj)


def __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle):
marker, counting, count, type_, dims = __get_container_params(fp_read, False, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle)
def __decode_array(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle):
marker, counting, count, type_, dims = __get_container_params(fp_read, False, object_hook, object_pairs_hook, intern_object_keys, islittle)

# special case - no data (None or bool)
if type_ in __TYPES_NO_DATA:
return [__METHOD_MAP[type_](fp_read, type_, islittle)] * count

# special case - bytes array
if type_ == TYPE_UINT8 and not no_bytes and len(dims)==0:
if type_ == TYPE_BYTE and len(dims)==0:
container = fp_read(count)
if len(container) < count:
raise DecoderException('Container bytes array too short')
Expand Down Expand Up @@ -388,9 +392,9 @@ def __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_obj
# handle outside above except (on KeyError) so do not have unfriendly "exception within except" backtrace
if not handled:
if marker == ARRAY_START:
value = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle)
value = __decode_array(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle)
elif marker == OBJECT_START:
value = __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys, islittle)
value = __decode_object(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle)
else:
raise DecoderException('Invalid marker within array')

Expand All @@ -411,14 +415,11 @@ def __object_hook_noop(obj):
return obj


def load(fp, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_object_keys=False, islittle=True):
def load(fp, object_hook=None, object_pairs_hook=None, intern_object_keys=False, islittle=True):
"""Decodes and returns BJData/UBJSON from the given file-like object
Args:
fp: read([size])-able object
no_bytes (bool): If set, typed UBJSON arrays (uint8) will not be
converted to a bytes instance and instead treated like
any other array (i.e. result in a list).
object_hook (callable): Called with the result of any object literal
decoded (instead of dict).
object_pairs_hook (callable): Called with the result of any object
Expand Down Expand Up @@ -454,13 +455,13 @@ def load(fp, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_ob
| | (2) unicode |
+----------------------------------+---------------+
| uint8, int8, int16, int32, int64 | (3) int |
| | (2) int, long |
| byte | (2) int, long |
+----------------------------------+---------------+
| float32, float64 | float |
+----------------------------------+---------------+
| high_precision | Decimal |
+----------------------------------+---------------+
| array (typed, uint8) | (3) bytes |
| array (typed, byte) | (3) bytes |
| | (2) str |
+----------------------------------+---------------+
| true | True |
Expand Down Expand Up @@ -489,9 +490,9 @@ def load(fp, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_ob
except KeyError:
pass
if marker == ARRAY_START:
newobj.append(__decode_array(fp_read, bool(no_bytes), object_hook, object_pairs_hook, intern_object_keys, islittle))
newobj.append(__decode_array(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle))
if marker == OBJECT_START:
newobj.append(__decode_object(fp_read, bool(no_bytes), object_hook, object_pairs_hook, intern_object_keys, islittle))
newobj.append(__decode_object(fp_read, object_hook, object_pairs_hook, intern_object_keys, islittle))
raise DecoderException('Invalid marker')
except DecoderException as ex:
if len(newobj)>0:
Expand All @@ -505,9 +506,9 @@ def load(fp, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_ob

return newobj;

def loadb(chars, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_object_keys=False, islittle=True):
def loadb(chars, object_hook=None, object_pairs_hook=None, intern_object_keys=False, islittle=True):
"""Decodes and returns BJData/UBJSON from the given bytes or bytesarray object. See
load() for available arguments."""
with BytesIO(chars) as fp:
return load(fp, no_bytes=no_bytes, object_hook=object_hook, object_pairs_hook=object_pairs_hook,
return load(fp, object_hook=object_hook, object_pairs_hook=object_pairs_hook,
intern_object_keys=intern_object_keys, islittle=islittle)
12 changes: 6 additions & 6 deletions bjdata/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from math import isinf, isnan

from .compat import Mapping, Sequence, INTEGER_TYPES, UNICODE_TYPE, TEXT_TYPES, BYTES_TYPES
from .markers import (TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32,
TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16, TYPE_FLOAT32,
TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, OBJECT_START,
from .markers import (TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_BYTE, TYPE_INT8, TYPE_UINT8, TYPE_INT16,
TYPE_INT32, TYPE_INT64, TYPE_UINT16, TYPE_UINT32, TYPE_UINT64, TYPE_FLOAT16, TYPE_FLOAT32,
TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, OBJECT_START,
OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT)

# Lookup tables for encoding small intergers, pre-initialised larger integer & float packers
Expand Down Expand Up @@ -57,7 +57,7 @@
}

# Prefix applicable to specialised byte array container
__BYTES_ARRAY_PREFIX = ARRAY_START + CONTAINER_TYPE + TYPE_UINT8 + CONTAINER_COUNT
__BYTES_ARRAY_PREFIX = ARRAY_START + CONTAINER_TYPE + TYPE_BYTE + CONTAINER_COUNT


class EncoderException(TypeError):
Expand Down Expand Up @@ -342,8 +342,8 @@ def dump(obj, fp, container_count=False, sort_keys=False, no_float32=True, islit
+------------------------------+-----------------------------------+
| Decimal | high_precision |
+------------------------------+-----------------------------------+
| (3) bytes, bytearray | array (type, uint8) |
| (2) str | array (type, uint8) |
| (3) bytes, bytearray | array (type, byte) |
| (2) str | array (type, byte) |
+------------------------------+-----------------------------------+
| (3) collections.abc.Mapping | object |
| (2) collections.Mapping | |
Expand Down
1 change: 1 addition & 0 deletions bjdata/markers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
TYPE_NOOP = b'N'
TYPE_BOOL_TRUE = b'T'
TYPE_BOOL_FALSE = b'F'
TYPE_BYTE = b'B'
TYPE_INT8 = b'i'
TYPE_UINT8 = b'U'
TYPE_INT16 = b'I'
Expand Down
10 changes: 7 additions & 3 deletions src/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ static long long _decode_int_non_negative(_bjdata_decoder_buffer_t *buffer, char
}

switch (marker) {
case TYPE_BYTE:
case TYPE_UINT8:
BAIL_ON_NULL(int_obj = _decode_uint8(buffer));
break;
Expand Down Expand Up @@ -716,7 +717,7 @@ static _container_params_t _get_container_params(_bjdata_decoder_buffer_t *buffe
case TYPE_NULL: case TYPE_BOOL_TRUE: case TYPE_BOOL_FALSE: case TYPE_CHAR: case TYPE_STRING: case TYPE_INT8:
case TYPE_UINT8: case TYPE_INT16: case TYPE_INT32: case TYPE_INT64: case TYPE_FLOAT32: case TYPE_FLOAT64:
#ifdef USE__BJDATA
case TYPE_UINT16: case TYPE_UINT32: case TYPE_UINT64: case TYPE_FLOAT16:
case TYPE_UINT16: case TYPE_UINT32: case TYPE_UINT64: case TYPE_FLOAT16: case TYPE_BYTE:
#endif
case TYPE_HIGH_PREC: case ARRAY_START: case OBJECT_START:
params.type = marker;
Expand Down Expand Up @@ -803,7 +804,8 @@ static int _is_fixed_len_type(char type) {
return ((TYPE_INT8 == type) || (TYPE_UINT8 == type) || (TYPE_INT16 == type)
|| (TYPE_UINT16 == type) || (TYPE_INT32 == type) || (TYPE_UINT32 == type)
|| (TYPE_INT64 == type) || (TYPE_UINT64 == type) || (TYPE_CHAR == type)
|| (TYPE_FLOAT16 == type) || (TYPE_FLOAT32 == type) || (TYPE_FLOAT64 == type));
|| (TYPE_FLOAT16 == type) || (TYPE_FLOAT32 == type) || (TYPE_FLOAT64 == type))
|| (TYPE_BYTE == type);
}

// Note: Does NOT reserve a new reference
Expand All @@ -821,6 +823,7 @@ static int _get_type_info(char type, int *bytelen) {
case TYPE_INT8:
*bytelen=1;
return PyArray_BYTE;
case TYPE_BYTE:
case TYPE_UINT8:
*bytelen=1;
return PyArray_UBYTE;
Expand Down Expand Up @@ -881,7 +884,7 @@ static PyObject* _decode_array(_bjdata_decoder_buffer_t *buffer) {
marker = params.marker;
if (params.counting) {
// special case - byte array
if ((TYPE_UINT8 == params.type) && !buffer->prefs.no_bytes && ndims==0) {
if ((TYPE_BYTE == params.type) && !buffer->prefs.no_bytes && ndims==0) {
BAIL_ON_NULL(list = PyBytes_FromStringAndSize(NULL, params.count));
READ_INTO_OR_BAIL(params.count, PyBytes_AS_STRING(list), "bytes array");
return list;
Expand Down Expand Up @@ -1211,6 +1214,7 @@ PyObject* _bjdata_decode_value(_bjdata_decoder_buffer_t *buffer, char *given_mar
case TYPE_INT64:
RETURN_OR_RAISE_DECODER_EXCEPTION(_decode_int64(buffer), "int64");
#ifdef USE__BJDATA
case TYPE_BYTE:
case TYPE_UINT8:
RETURN_OR_RAISE_DECODER_EXCEPTION(_decode_uint8(buffer), "uint8");
case TYPE_FLOAT16:
Expand Down
2 changes: 1 addition & 1 deletion src/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

/******************************************************************************/

static char bytes_array_prefix[] = {ARRAY_START, CONTAINER_TYPE, TYPE_UINT8, CONTAINER_COUNT};
static char bytes_array_prefix[] = {ARRAY_START, CONTAINER_TYPE, TYPE_BYTE, CONTAINER_COUNT};

#define POWER_TWO(x) ((long long) 1 << (x))

Expand Down
1 change: 1 addition & 0 deletions src/markers.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ extern "C" {
#define TYPE_NOOP 'N'
#define TYPE_BOOL_TRUE 'T'
#define TYPE_BOOL_FALSE 'F'
#define TYPE_BYTE 'B'
#define TYPE_INT8 'i'
#define TYPE_UINT8 'U'
#define TYPE_INT16 'I'
Expand Down

0 comments on commit afa0a23

Please sign in to comment.