Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve half-floats handling to be RFC-conformant #49

Merged
merged 7 commits into from
Jan 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Next
- Fixed Linux installation directory depending on architecture [#34] (by jvymazal)
- Improved 32-bit support [#35]
- Fixed MSVC compatibility [#31]
- Fixed and improved half-float encoding [#5] [#11]

0.4.0 (2015-12-25)
---------------------
Expand Down
15 changes: 15 additions & 0 deletions doc/source/api/type_7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,18 @@ Manipulating existing items
.. doxygenfunction:: cbor_set_float4
.. doxygenfunction:: cbor_set_float8


.. _api_type_7_hard_floats:

Half floats
~~~~~~~~~~~~
CBOR supports two `bytes wide ("half-precision") <https://en.wikipedia.org/wiki/Half-precision_floating-point_format>`_
floats which are not supported by the C language. *libcbor* represents them using :type:`float`
values throughout the API, which has important implications when manipulating these values.

In particular, if a user uses some of the manipulation APIs
(e.g. :func:`cbor_set_float2`, :func:`cbor_new_float2`)
to introduce a value that doesn't have an exect half-float representation,
the encoding semantics are given by :func:`cbor_encode_half` as follows:

.. doxygenfunction:: cbor_encode_half
4 changes: 3 additions & 1 deletion doc/source/rfc_conformance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@ There is no explicit limitation of indefinite length byte strings. [#]_ *libcbor

"Half-precision" IEEE 754 floats
---------------------------------
As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`double`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode.
As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`float`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode.

For more information on half-float serialization, please refer to the section on :ref:`api_type_7_hard_floats`.

32 changes: 22 additions & 10 deletions src/cbor/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,33 @@ size_t cbor_encode_half(float value, unsigned char *buffer, size_t buffer_size)
/* Assuming value is normalized */
uint32_t val = ((union _cbor_float_helper) {.as_float = value}).as_uint;
uint16_t res;
uint8_t exp = (val & 0x7F800000) >> 23; /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */
uint8_t exp = (uint8_t) ((val & 0x7F800000) >> 23); /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */
uint32_t mant = val & 0x7FFFFF; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */
if (exp == 0xFF) { /* Infinity or NaNs */
if (value != value)
if (value != value) {
res = (uint16_t) 0x00e700; /* Not IEEE semantics - required by CBOR [s. 3.9] */
else
res = (val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15;
} else {
res = (uint16_t) ((val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15);
}
} else if (exp == 0x00) { /* Zeroes or subnorms */
res = (val & 0x80000000) >> 16 | (uint16_t) (mant >> 13);
res = (uint16_t) ((val & 0x80000000) >> 16 | mant >> 13);
} else { /* Normal numbers */
exp -= 127;
if (((int8_t) exp) > 15 || ((int8_t) exp) < -14)
return 0; /* No way we can represent magnitude in normalized way */
else
res = (val & 0x80000000) >> 16 | ((exp + 15) << 10) | (uint16_t) (mant >> 13);
int8_t logical_exp = (int8_t) (exp - 127);
assert(logical_exp == exp - 127);

// Now we know that 2^exp <= 0 logically
if (logical_exp < -24) {
/* No unambiguous representation exists, this float is not a half float and is too small to
be represented using a half, round off to zero. Consistent with the reference implementation. */
res = 0;
} else if (logical_exp < -14) {
/* Offset the remaining decimal places by shifting the significand, the value is lost.
This is an implementation decision that works around the absence of standard half-float
in the language. */
res = (uint16_t) (val & 0x80000000) >> 16 | (uint16_t) (1 << (24 + logical_exp));
} else {
res = (uint16_t) ((val & 0x80000000) >> 16 | ((((uint8_t) logical_exp) + 15) << 10) | (uint16_t) (mant >> 13));
}
}
return _cbor_encode_uint16(res, buffer, buffer_size, 0xE0);
}
Expand Down
25 changes: 24 additions & 1 deletion src/cbor/encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ extern "C" {
* ============================================================================
*/

/** Primitive encoder */
size_t cbor_encode_uint8(uint8_t, unsigned char *, size_t);

size_t cbor_encode_uint16(uint16_t, unsigned char *, size_t);
Expand Down Expand Up @@ -66,6 +65,30 @@ size_t cbor_encode_null(unsigned char *, size_t);

size_t cbor_encode_undef(unsigned char *, size_t);

/** Encodes a half-precision float
*
* Since there is no native representation or semantics for half floats
* in the language, we use single-precision floats, as every value that
* can be expressed as a half-float can also be expressed as a float.
*
* This however means that not all floats passed to this function can be
* unambiguously encoded. The behavior is as follows:
* - Infinity, NaN are preserved
* - Zero is preserved
* - Denormalized numbers keep their sign bit and 10 most significant bit of the significand
* - All other numbers
* - If the logical value of the exponent is < -24, the output is zero
* - If the logical value of the exponent is between -23 and -14, the output
* is cut off to represent the 'magnitude' of the input, by which we
* mean (-1)^{signbit} x 1.0e{exponent}. The value in the significand is lost.
* - In all other cases, the sign bit, the exponent, and 10 most significant bits
* of the significand are kept
*
* @param value
* @param buffer Target buffer
* @param buffer_size Available space in the buffer
* @return number of bytes written
*/
size_t cbor_encode_half(float, unsigned char *, size_t);

size_t cbor_encode_single(float, unsigned char *, size_t);
Expand Down
30 changes: 28 additions & 2 deletions test/type_7_encoders_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,40 @@ static void test_break(void **state)

static void test_half(void **state)
{
assert_int_equal(3, cbor_encode_half(1.5, buffer, 512));
assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x3E, 0x00}), 3);

assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x80, 0x00}), 3);

assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
assert_int_equal(3, cbor_encode_half(65504.0, buffer, 512));

assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7B, 0xFF}), 3);

assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x04, 0x00}), 3);

assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0xC4, 0x00}), 3);

/* Smallest representable value */
assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);

/* Smaller than the smallest, approximate magnitude representation */
assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);

/* Smaller than the smallest and even the magnitude cannot be represented,
round off to zero */
assert_int_equal(3, cbor_encode_half(1e-25f, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);

assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x02}), 3);

assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512));
assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7C, 0x00}), 3);
}
Expand Down