From 075cdbf798145d2ebb6cefab3e684ade50521a22 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 8 Jan 2017 16:56:06 +0100 Subject: [PATCH 1/6] Add more RFC tests --- test/type_7_encoders_test.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c index 357b0f88..28625e24 100644 --- a/test/type_7_encoders_test.c +++ b/test/type_7_encoders_test.c @@ -45,14 +45,20 @@ static void test_break(void **state) static void test_half(void **state) { - assert_int_equal(3, cbor_encode_half(1.5, buffer, 512)); + assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x3E, 0x00}), 3); assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x80, 0x00}), 3); assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3); - assert_int_equal(3, cbor_encode_half(65504.0, buffer, 512)); + assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7B, 0xFF}), 3); + assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x04, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x40, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3); assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7C, 0x00}), 3); } From 703f27420f05b9f788473f4026c104b0bdfd5222 Mon Sep 17 00:00:00 2001 From: PJK Date: Thu, 12 Jan 2017 01:50:35 +0100 Subject: [PATCH 2/6] Halfs handling improvement --- src/cbor/encoding.c | 29 +++++++++++++++++++---------- test/type_7_encoders_test.c | 12 +++++++++++- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c index 056c6f33..f8b942f7 100644 --- a/src/cbor/encoding.c +++ b/src/cbor/encoding.c @@ -133,21 +133,30 @@ size_t cbor_encode_half(float value, unsigned char *buffer, size_t buffer_size) /* Assuming value is normalized */ uint32_t val = ((union _cbor_float_helper) {.as_float = value}).as_uint; uint16_t res; - uint8_t exp = (val & 0x7F800000) >> 23; /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */ + uint8_t exp = (uint8_t) ((val & 0x7F800000) >> 23); /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */ uint32_t mant = val & 0x7FFFFF; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */ if (exp == 0xFF) { /* Infinity or NaNs */ - if (value != value) + if (value != value) { res = (uint16_t) 0x00e700; /* Not IEEE semantics - required by CBOR [s. 3.9] */ - else - res = (val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15; + } else { + res = (uint16_t) ((val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15); + } } else if (exp == 0x00) { /* Zeroes or subnorms */ - res = (val & 0x80000000) >> 16 | (uint16_t) (mant >> 13); + res = (uint16_t) ((val & 0x80000000) >> 16 | mant >> 13); } else { /* Normal numbers */ - exp -= 127; - if (((int8_t) exp) > 15 || ((int8_t) exp) < -14) - return 0; /* No way we can represent magnitude in normalized way */ - else - res = (val & 0x80000000) >> 16 | ((exp + 15) << 10) | (uint16_t) (mant >> 13); + int8_t logical_exp = (int8_t) (exp - 127); + assert(logical_exp == exp - 127); + + // Now we know that 2^exp <= 0 logically + if (logical_exp < -24) { + // TODO maybe handle in a different way and give some encoding + return 0; /* No unambiguous representation exists, this float is not a halft float */ + } else if (logical_exp < -14) { + /* Offset the remaining decimal places by shifting the significand, the value is lost */ + res = (uint16_t) (val & 0x80000000) >> 16 | (uint16_t) (1 << (24 + logical_exp)); + } else { + res = (uint16_t) ((val & 0x80000000) >> 16 | ((((uint8_t) logical_exp) + 15) << 10) | (uint16_t) (mant >> 13)); + } } return _cbor_encode_uint16(res, buffer, buffer_size, 0xE0); } diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c index 28625e24..b53e26fc 100644 --- a/test/type_7_encoders_test.c +++ b/test/type_7_encoders_test.c @@ -47,18 +47,28 @@ static void test_half(void **state) { assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x3E, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x80, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7B, 0xFF}), 3); + assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x04, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x40, 0x00}), 3); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0xC4, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3); + + assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x02}), 3); + assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7C, 0x00}), 3); } From e962f4b325aa6fe5b8fb4f6fe0df97cc388d9f5e Mon Sep 17 00:00:00 2001 From: PJK Date: Thu, 12 Jan 2017 13:50:31 +0100 Subject: [PATCH 3/6] Better half floats encoding --- src/cbor/encoding.c | 9 ++++++--- test/type_7_encoders_test.c | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c index f8b942f7..bb636d82 100644 --- a/src/cbor/encoding.c +++ b/src/cbor/encoding.c @@ -149,10 +149,13 @@ size_t cbor_encode_half(float value, unsigned char *buffer, size_t buffer_size) // Now we know that 2^exp <= 0 logically if (logical_exp < -24) { - // TODO maybe handle in a different way and give some encoding - return 0; /* No unambiguous representation exists, this float is not a halft float */ + /* No unambiguous representation exists, this float is not a half float and is too small to + be represented using a half, round off to zero. Consistent with the reference implementation. */ + res = 0; } else if (logical_exp < -14) { - /* Offset the remaining decimal places by shifting the significand, the value is lost */ + /* Offset the remaining decimal places by shifting the significand, the value is lost. + This is an implementation decision that works around the absence of standard half-float + in the language. */ res = (uint16_t) (val & 0x80000000) >> 16 | (uint16_t) (1 << (24 + logical_exp)); } else { res = (uint16_t) ((val & 0x80000000) >> 16 | ((((uint8_t) logical_exp) + 15) << 10) | (uint16_t) (mant >> 13)); diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c index b53e26fc..352d2b65 100644 --- a/test/type_7_encoders_test.c +++ b/test/type_7_encoders_test.c @@ -63,9 +63,14 @@ static void test_half(void **state) assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0xC4, 0x00}), 3); + /* Smallest representable value */ assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3); + /* Smaller than the smallest, round off to zero */ + assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3); + assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x02}), 3); From 8e32c0b22e085131c837356b6c409c5c2412e067 Mon Sep 17 00:00:00 2001 From: PJK Date: Thu, 12 Jan 2017 13:52:28 +0100 Subject: [PATCH 4/6] More tests on round-off behavior --- test/type_7_encoders_test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c index 352d2b65..95f65c41 100644 --- a/test/type_7_encoders_test.c +++ b/test/type_7_encoders_test.c @@ -67,8 +67,13 @@ static void test_half(void **state) assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3); - /* Smaller than the smallest, round off to zero */ + /* Smaller than the smallest, approximate magnitude representation */ assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3); + + /* Smaller than the smallest and even the magnitude cannot be represented, + round off to zero */ + assert_int_equal(3, cbor_encode_half(1e-25f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3); assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512)); From 9fe0a6542308cf8421a8c3c721a22f4c2f119a13 Mon Sep 17 00:00:00 2001 From: PJK Date: Thu, 12 Jan 2017 14:21:37 +0100 Subject: [PATCH 5/6] Update docs --- doc/source/api/type_7.rst | 13 +++++++++++++ src/cbor/encoding.h | 25 ++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/doc/source/api/type_7.rst b/doc/source/api/type_7.rst index f5a892a0..a0b13f4f 100644 --- a/doc/source/api/type_7.rst +++ b/doc/source/api/type_7.rst @@ -59,3 +59,16 @@ Manipulating existing items .. doxygenfunction:: cbor_set_float4 .. doxygenfunction:: cbor_set_float8 + +Half floats +~~~~~~~~~~~~ +CBOR supports two `bytes wide ("half-precision") `_ +floats which are not supported by the C language. *libcbor* represents them using ``float`` +values throughout the API, which has important implications when manipulating these values. + +In particular, if a user uses some of the manipulation APIs +(e.g. :func:`cbor_set_float2`, :func:`cbor_new_float2`) +to introduce a value that doesn't have an exect half-float representation, +the encoding semantics are given by :func:`cbor_encode_half` as follows: + +.. doxygenfunction:: cbor_encode_half diff --git a/src/cbor/encoding.h b/src/cbor/encoding.h index db1054ea..8117582b 100644 --- a/src/cbor/encoding.h +++ b/src/cbor/encoding.h @@ -21,7 +21,6 @@ extern "C" { * ============================================================================ */ -/** Primitive encoder */ size_t cbor_encode_uint8(uint8_t, unsigned char *, size_t); size_t cbor_encode_uint16(uint16_t, unsigned char *, size_t); @@ -66,6 +65,30 @@ size_t cbor_encode_null(unsigned char *, size_t); size_t cbor_encode_undef(unsigned char *, size_t); +/** Encodes a half-precision float + * + * Since there is no native representation or semantics for half floats + * in the language, we use single-precision floats, as every value that + * can be expressed as a half-float can also be expressed as a float. + * + * This however means that not all floats passed to this function can be + * unambiguously encoded. The behavior is as follows: + * - Infinity, NaN are preserved + * - Zero is preserved + * - Denormalized numbers keep their sign bit and 10 most significant bit of the significand + * - All other numbers + * - If the logical value of the exponent is < -24, the output is zero + * - If the logical value of the exponent is between -23 and -14, the output + * is cut off to represent the 'magnitude' of the input, by which we + * mean (-1)^{signbit} x 1.0e{exponent}. The value in the significand is lost. + * - In all other cases, the sign bit, the exponent, and 10 most significant bits + * of the significand are kept + * + * @param value + * @param buffer Target buffer + * @param buffer_size Available space in the buffer + * @return number of bytes written + */ size_t cbor_encode_half(float, unsigned char *, size_t); size_t cbor_encode_single(float, unsigned char *, size_t); From 21c091479b70509eee33db18c0d220cbbad74961 Mon Sep 17 00:00:00 2001 From: PJK Date: Thu, 12 Jan 2017 14:33:46 +0100 Subject: [PATCH 6/6] Document changes --- CHANGELOG.md | 1 + doc/source/api/type_7.rst | 4 +++- doc/source/rfc_conformance.rst | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 732c8cd2..1cd213f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Next - Fixed Linux installation directory depending on architecture [#34] (by jvymazal) - Improved 32-bit support [#35] - Fixed MSVC compatibility [#31] +- Fixed and improved half-float encoding [#5] [#11] 0.4.0 (2015-12-25) --------------------- diff --git a/doc/source/api/type_7.rst b/doc/source/api/type_7.rst index a0b13f4f..c48d317b 100644 --- a/doc/source/api/type_7.rst +++ b/doc/source/api/type_7.rst @@ -60,10 +60,12 @@ Manipulating existing items .. doxygenfunction:: cbor_set_float8 +.. _api_type_7_hard_floats: + Half floats ~~~~~~~~~~~~ CBOR supports two `bytes wide ("half-precision") `_ -floats which are not supported by the C language. *libcbor* represents them using ``float`` +floats which are not supported by the C language. *libcbor* represents them using :type:`float` values throughout the API, which has important implications when manipulating these values. In particular, if a user uses some of the manipulation APIs diff --git a/doc/source/rfc_conformance.rst b/doc/source/rfc_conformance.rst index cbef7bed..8ec80bf0 100644 --- a/doc/source/rfc_conformance.rst +++ b/doc/source/rfc_conformance.rst @@ -11,5 +11,7 @@ There is no explicit limitation of indefinite length byte strings. [#]_ *libcbor "Half-precision" IEEE 754 floats --------------------------------- -As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`double`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode. +As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`float`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode. + +For more information on half-float serialization, please refer to the section on :ref:`api_type_7_hard_floats`.