From 075cdbf798145d2ebb6cefab3e684ade50521a22 Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Sun, 8 Jan 2017 16:56:06 +0100
Subject: [PATCH 1/6] Add more RFC tests

---
 test/type_7_encoders_test.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c
index 357b0f88..28625e24 100644
--- a/test/type_7_encoders_test.c
+++ b/test/type_7_encoders_test.c
@@ -45,14 +45,20 @@ static void test_break(void **state)
 
 static void test_half(void **state)
 {
-	assert_int_equal(3, cbor_encode_half(1.5, buffer, 512));
+	assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x3E, 0x00}), 3);
 	assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x80, 0x00}), 3);
 	assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
-	assert_int_equal(3, cbor_encode_half(65504.0, buffer, 512));
+	assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7B, 0xFF}), 3);
+	assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x04, 0x00}), 3);
+	assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x40, 0x00}), 3);
+	assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
 	assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7C, 0x00}), 3);
 }

From 703f27420f05b9f788473f4026c104b0bdfd5222 Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Thu, 12 Jan 2017 01:50:35 +0100
Subject: [PATCH 2/6] Halfs handling improvement

---
 src/cbor/encoding.c         | 29 +++++++++++++++++++----------
 test/type_7_encoders_test.c | 12 +++++++++++-
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c
index 056c6f33..f8b942f7 100644
--- a/src/cbor/encoding.c
+++ b/src/cbor/encoding.c
@@ -133,21 +133,30 @@ size_t cbor_encode_half(float value, unsigned char *buffer, size_t buffer_size)
 	/* Assuming value is normalized */
 	uint32_t val = ((union _cbor_float_helper) {.as_float = value}).as_uint;
 	uint16_t res;
-	uint8_t exp = (val & 0x7F800000) >> 23; /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */
+	uint8_t exp = (uint8_t) ((val & 0x7F800000) >> 23); /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */
 	uint32_t mant = val & 0x7FFFFF; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */
 	if (exp == 0xFF) { /* Infinity or NaNs */
-		if (value != value)
+		if (value != value) {
 			res = (uint16_t) 0x00e700; /* Not IEEE semantics - required by CBOR [s. 3.9] */
-		else
-			res = (val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15;
+		} else {
+			res = (uint16_t) ((val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15);
+		}
 	} else if (exp == 0x00) { /* Zeroes or subnorms */
-		res = (val & 0x80000000) >> 16 | (uint16_t) (mant >> 13);
+		res = (uint16_t) ((val & 0x80000000) >> 16 | mant >> 13);
 	} else { /* Normal numbers */
-		exp -= 127;
-		if (((int8_t) exp) > 15 || ((int8_t) exp) < -14)
-			return 0; /* No way we can represent magnitude in normalized way */
-		else
-			res = (val & 0x80000000) >> 16 | ((exp + 15) << 10) | (uint16_t) (mant >> 13);
+		int8_t logical_exp = (int8_t) (exp - 127);
+		assert(logical_exp == exp - 127);
+
+		// Now we know that 2^exp <= 0 logically
+		if (logical_exp < -24) {
+			// TODO maybe handle in a different way and give some encoding
+			return 0; /* No unambiguous representation exists, this float is not a halft float */
+		} else if (logical_exp < -14) {
+			/* Offset the remaining decimal places by shifting the significand, the value is lost */
+			res = (uint16_t) (val & 0x80000000) >> 16 | (uint16_t) (1 << (24 + logical_exp));
+		} else {
+			res = (uint16_t) ((val & 0x80000000) >> 16 | ((((uint8_t) logical_exp) + 15) << 10) | (uint16_t) (mant >> 13));
+		}
 	}
 	return _cbor_encode_uint16(res, buffer, buffer_size, 0xE0);
 }
diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c
index 28625e24..b53e26fc 100644
--- a/test/type_7_encoders_test.c
+++ b/test/type_7_encoders_test.c
@@ -47,18 +47,28 @@ static void test_half(void **state)
 {
 	assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x3E, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x80, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7B, 0xFF}), 3);
+
 	assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x04, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512));
-	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x40, 0x00}), 3);
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0xC4, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
+
+	assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x02}), 3);
+
 	assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7C, 0x00}), 3);
 }

From e962f4b325aa6fe5b8fb4f6fe0df97cc388d9f5e Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Thu, 12 Jan 2017 13:50:31 +0100
Subject: [PATCH 3/6] Better half floats encoding

---
 src/cbor/encoding.c         | 9 ++++++---
 test/type_7_encoders_test.c | 5 +++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c
index f8b942f7..bb636d82 100644
--- a/src/cbor/encoding.c
+++ b/src/cbor/encoding.c
@@ -149,10 +149,13 @@ size_t cbor_encode_half(float value, unsigned char *buffer, size_t buffer_size)
 
 		// Now we know that 2^exp <= 0 logically
 		if (logical_exp < -24) {
-			// TODO maybe handle in a different way and give some encoding
-			return 0; /* No unambiguous representation exists, this float is not a halft float */
+			/* No unambiguous representation exists, this float is not a half float and is too small to
+			   be represented using a half, round off to zero. Consistent with the reference implementation. */
+			res = 0;
 		} else if (logical_exp < -14) {
-			/* Offset the remaining decimal places by shifting the significand, the value is lost */
+			/* Offset the remaining decimal places by shifting the significand, the value is lost.
+			   This is an implementation decision that works around the absence of standard half-float
+			   in the language. */
 			res = (uint16_t) (val & 0x80000000) >> 16 | (uint16_t) (1 << (24 + logical_exp));
 		} else {
 			res = (uint16_t) ((val & 0x80000000) >> 16 | ((((uint8_t) logical_exp) + 15) << 10) | (uint16_t) (mant >> 13));
diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c
index b53e26fc..352d2b65 100644
--- a/test/type_7_encoders_test.c
+++ b/test/type_7_encoders_test.c
@@ -63,9 +63,14 @@ static void test_half(void **state)
 	assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0xC4, 0x00}), 3);
 
+	/* Smallest representable value */
 	assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
 
+	/* Smaller than the smallest, round off to zero */
+	assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x02}), 3);
 

From 8e32c0b22e085131c837356b6c409c5c2412e067 Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Thu, 12 Jan 2017 13:52:28 +0100
Subject: [PATCH 4/6] More tests on round-off behavior

---
 test/type_7_encoders_test.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c
index 352d2b65..95f65c41 100644
--- a/test/type_7_encoders_test.c
+++ b/test/type_7_encoders_test.c
@@ -67,8 +67,13 @@ static void test_half(void **state)
 	assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
 
-	/* Smaller than the smallest, round off to zero */
+	/* Smaller than the smallest, approximate magnitude representation */
 	assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
+
+	/* Smaller than the smallest and even the magnitude cannot be represented,
+	   round off to zero */
+	assert_int_equal(3, cbor_encode_half(1e-25f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
 
 	assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));

From 9fe0a6542308cf8421a8c3c721a22f4c2f119a13 Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Thu, 12 Jan 2017 14:21:37 +0100
Subject: [PATCH 5/6] Update docs

---
 doc/source/api/type_7.rst | 13 +++++++++++++
 src/cbor/encoding.h       | 25 ++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/doc/source/api/type_7.rst b/doc/source/api/type_7.rst
index f5a892a0..a0b13f4f 100644
--- a/doc/source/api/type_7.rst
+++ b/doc/source/api/type_7.rst
@@ -59,3 +59,16 @@ Manipulating existing items
 .. doxygenfunction:: cbor_set_float4
 .. doxygenfunction:: cbor_set_float8
 
+
+Half floats
+~~~~~~~~~~~~
+CBOR supports two `bytes wide ("half-precision") <https://en.wikipedia.org/wiki/Half-precision_floating-point_format>`_
+floats which are not supported by the C language. *libcbor* represents them using ``float``
+values throughout the API, which has important implications when manipulating these values.
+
+In particular, if a user uses some of the manipulation APIs
+(e.g. :func:`cbor_set_float2`, :func:`cbor_new_float2`)
+to introduce a value that doesn't have an exect half-float representation,
+the encoding semantics are given by :func:`cbor_encode_half` as follows:
+
+.. doxygenfunction:: cbor_encode_half
diff --git a/src/cbor/encoding.h b/src/cbor/encoding.h
index db1054ea..8117582b 100644
--- a/src/cbor/encoding.h
+++ b/src/cbor/encoding.h
@@ -21,7 +21,6 @@ extern "C" {
 * ============================================================================
 */
 
-/** Primitive encoder */
 size_t cbor_encode_uint8(uint8_t, unsigned char *, size_t);
 
 size_t cbor_encode_uint16(uint16_t, unsigned char *, size_t);
@@ -66,6 +65,30 @@ size_t cbor_encode_null(unsigned char *, size_t);
 
 size_t cbor_encode_undef(unsigned char *, size_t);
 
+/** Encodes a half-precision float
+ *
+ * Since there is no native representation or semantics for half floats
+ * in the language, we use single-precision floats, as every value that
+ * can be expressed as a half-float can also be expressed as a float.
+ *
+ * This however means that not all floats passed to this function can be
+ * unambiguously encoded. The behavior is as follows:
+ *  - Infinity, NaN are preserved
+ *  - Zero is preserved
+ *  - Denormalized numbers keep their sign bit and 10 most significant bit of the significand
+ *  - All other numbers
+ *   - If the logical value of the exponent is < -24, the output is zero
+ *   - If the logical value of the exponent is between -23 and -14, the output
+ *     is cut off to represent the 'magnitude' of the input, by which we
+ *     mean (-1)^{signbit} x 1.0e{exponent}. The value in the significand is lost.
+ *   - In all other cases, the sign bit, the exponent, and 10 most significant bits
+ *     of the significand are kept
+ *
+ * @param value
+ * @param buffer Target buffer
+ * @param buffer_size Available space in the buffer
+ * @return number of bytes written
+ */
 size_t cbor_encode_half(float, unsigned char *, size_t);
 
 size_t cbor_encode_single(float, unsigned char *, size_t);

From 21c091479b70509eee33db18c0d220cbbad74961 Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Thu, 12 Jan 2017 14:33:46 +0100
Subject: [PATCH 6/6] Document changes

---
 CHANGELOG.md                   | 1 +
 doc/source/api/type_7.rst      | 4 +++-
 doc/source/rfc_conformance.rst | 4 +++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 732c8cd2..1cd213f9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ Next
 - Fixed Linux installation directory depending on architecture [#34] (by jvymazal)
 - Improved 32-bit support [#35]
 - Fixed MSVC compatibility [#31]
+- Fixed and improved half-float encoding [#5] [#11]
 
 0.4.0 (2015-12-25)
 ---------------------
diff --git a/doc/source/api/type_7.rst b/doc/source/api/type_7.rst
index a0b13f4f..c48d317b 100644
--- a/doc/source/api/type_7.rst
+++ b/doc/source/api/type_7.rst
@@ -60,10 +60,12 @@ Manipulating existing items
 .. doxygenfunction:: cbor_set_float8
 
 
+.. _api_type_7_hard_floats:
+
 Half floats
 ~~~~~~~~~~~~
 CBOR supports two `bytes wide ("half-precision") <https://en.wikipedia.org/wiki/Half-precision_floating-point_format>`_
-floats which are not supported by the C language. *libcbor* represents them using ``float``
+floats which are not supported by the C language. *libcbor* represents them using :type:`float`
 values throughout the API, which has important implications when manipulating these values.
 
 In particular, if a user uses some of the manipulation APIs
diff --git a/doc/source/rfc_conformance.rst b/doc/source/rfc_conformance.rst
index cbef7bed..8ec80bf0 100644
--- a/doc/source/rfc_conformance.rst
+++ b/doc/source/rfc_conformance.rst
@@ -11,5 +11,7 @@ There is no explicit limitation of indefinite length byte strings. [#]_ *libcbor
 
 "Half-precision" IEEE 754 floats
 ---------------------------------
-As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`double`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode.
+As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`float`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode.
+
+For more information on half-float serialization, please refer to the section on :ref:`api_type_7_hard_floats`.