PJK · PJK · Jan 12, 2017 · Jan 8, 2017 · Jan 12, 2017 · Jan 12, 2017
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ Next
 - Fixed Linux installation directory depending on architecture [#34] (by jvymazal)
 - Improved 32-bit support [#35]
 - Fixed MSVC compatibility [#31]
+- Fixed and improved half-float encoding [#5] [#11]
 
 0.4.0 (2015-12-25)
 ---------------------

diff --git a/doc/source/api/type_7.rst b/doc/source/api/type_7.rst
@@ -59,3 +59,18 @@ Manipulating existing items
 .. doxygenfunction:: cbor_set_float4
 .. doxygenfunction:: cbor_set_float8
 
+
+.. _api_type_7_hard_floats:
+
+Half floats
+~~~~~~~~~~~~
+CBOR supports two `bytes wide ("half-precision") <https://en.wikipedia.org/wiki/Half-precision_floating-point_format>`_
+floats which are not supported by the C language. *libcbor* represents them using :type:`float`
+values throughout the API, which has important implications when manipulating these values.
+
+In particular, if a user uses some of the manipulation APIs
+(e.g. :func:`cbor_set_float2`, :func:`cbor_new_float2`)
+to introduce a value that doesn't have an exect half-float representation,
+the encoding semantics are given by :func:`cbor_encode_half` as follows:
+
+.. doxygenfunction:: cbor_encode_half
diff --git a/doc/source/rfc_conformance.rst b/doc/source/rfc_conformance.rst
@@ -11,5 +11,7 @@ There is no explicit limitation of indefinite length byte strings. [#]_ *libcbor
 
 "Half-precision" IEEE 754 floats
 ---------------------------------
-As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`double`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode.
+As of C99 and even C11, there is no standard implementation for 2 bytes floats. *libcbor* packs them as a :type:`float`. When encoding, *libcbor* selects the appropriate wire representation based on metadata and the actual value. This applies both to canonical and normal mode.
+
+For more information on half-float serialization, please refer to the section on :ref:`api_type_7_hard_floats`.
 
diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c
@@ -133,21 +133,33 @@ size_t cbor_encode_half(float value, unsigned char *buffer, size_t buffer_size)
 	/* Assuming value is normalized */
 	uint32_t val = ((union _cbor_float_helper) {.as_float = value}).as_uint;
 	uint16_t res;
-	uint8_t exp = (val & 0x7F800000) >> 23; /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */
+	uint8_t exp = (uint8_t) ((val & 0x7F800000) >> 23); /* 0b0111_1111_1000_0000_0000_0000_0000_0000 */
 	uint32_t mant = val & 0x7FFFFF; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */
 	if (exp == 0xFF) { /* Infinity or NaNs */
-		if (value != value)
+		if (value != value) {
 			res = (uint16_t) 0x00e700; /* Not IEEE semantics - required by CBOR [s. 3.9] */
-		else
-			res = (val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15;
+		} else {
+			res = (uint16_t) ((val & 0x80000000) >> 16 | 0x7C00 | (mant ? 1 : 0) << 15);
+		}
 	} else if (exp == 0x00) { /* Zeroes or subnorms */
-		res = (val & 0x80000000) >> 16 | (uint16_t) (mant >> 13);
+		res = (uint16_t) ((val & 0x80000000) >> 16 | mant >> 13);
 	} else { /* Normal numbers */
-		exp -= 127;
-		if (((int8_t) exp) > 15 || ((int8_t) exp) < -14)
-			return 0; /* No way we can represent magnitude in normalized way */
-		else
-			res = (val & 0x80000000) >> 16 | ((exp + 15) << 10) | (uint16_t) (mant >> 13);
+		int8_t logical_exp = (int8_t) (exp - 127);
+		assert(logical_exp == exp - 127);
+
+		// Now we know that 2^exp <= 0 logically
+		if (logical_exp < -24) {
+			/* No unambiguous representation exists, this float is not a half float and is too small to
+			   be represented using a half, round off to zero. Consistent with the reference implementation. */
+			res = 0;
+		} else if (logical_exp < -14) {
+			/* Offset the remaining decimal places by shifting the significand, the value is lost.
+			   This is an implementation decision that works around the absence of standard half-float
+			   in the language. */
+			res = (uint16_t) (val & 0x80000000) >> 16 | (uint16_t) (1 << (24 + logical_exp));
+		} else {
+			res = (uint16_t) ((val & 0x80000000) >> 16 | ((((uint8_t) logical_exp) + 15) << 10) | (uint16_t) (mant >> 13));
+		}
 	}
 	return _cbor_encode_uint16(res, buffer, buffer_size, 0xE0);
 }

diff --git a/src/cbor/encoding.h b/src/cbor/encoding.h
@@ -21,7 +21,6 @@ extern "C" {
 * ============================================================================
 */
 
-/** Primitive encoder */
 size_t cbor_encode_uint8(uint8_t, unsigned char *, size_t);
 
 size_t cbor_encode_uint16(uint16_t, unsigned char *, size_t);
@@ -66,6 +65,30 @@ size_t cbor_encode_null(unsigned char *, size_t);
 
 size_t cbor_encode_undef(unsigned char *, size_t);
 
+/** Encodes a half-precision float
+ *
+ * Since there is no native representation or semantics for half floats
+ * in the language, we use single-precision floats, as every value that
+ * can be expressed as a half-float can also be expressed as a float.
+ *
+ * This however means that not all floats passed to this function can be
+ * unambiguously encoded. The behavior is as follows:
+ *  - Infinity, NaN are preserved
+ *  - Zero is preserved
+ *  - Denormalized numbers keep their sign bit and 10 most significant bit of the significand
+ *  - All other numbers
+ *   - If the logical value of the exponent is < -24, the output is zero
+ *   - If the logical value of the exponent is between -23 and -14, the output
+ *     is cut off to represent the 'magnitude' of the input, by which we
+ *     mean (-1)^{signbit} x 1.0e{exponent}. The value in the significand is lost.
+ *   - In all other cases, the sign bit, the exponent, and 10 most significant bits
+ *     of the significand are kept
+ *
+ * @param value
+ * @param buffer Target buffer
+ * @param buffer_size Available space in the buffer
+ * @return number of bytes written
+ */
 size_t cbor_encode_half(float, unsigned char *, size_t);
 
 size_t cbor_encode_single(float, unsigned char *, size_t);

diff --git a/test/type_7_encoders_test.c b/test/type_7_encoders_test.c
@@ -45,14 +45,40 @@ static void test_break(void **state)
 
 static void test_half(void **state)
 {
-	assert_int_equal(3, cbor_encode_half(1.5, buffer, 512));
+	assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x3E, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x80, 0x00}), 3);
+
 	assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
-	assert_int_equal(3, cbor_encode_half(65504.0, buffer, 512));
+
+	assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7B, 0xFF}), 3);
+
+	assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x04, 0x00}), 3);
+
+	assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0xC4, 0x00}), 3);
+
+	/* Smallest representable value */
+	assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
+
+	/* Smaller than the smallest, approximate magnitude representation */
+	assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x01}), 3);
+
+	/* Smaller than the smallest and even the magnitude cannot be represented,
+	   round off to zero */
+	assert_int_equal(3, cbor_encode_half(1e-25f, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x00}), 3);
+
+	assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));
+	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x00, 0x02}), 3);
+
 	assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512));
 	assert_memory_equal(buffer, ((unsigned char[]) {0xF9, 0x7C, 0x00}), 3);
 }