zephyrproject-rtos · nashif · Sep 8, 2025 · Sep 2, 2025 · Sep 4, 2025 · Sep 3, 2025
diff --git a/doc/releases/migration-guide-4.3.rst b/doc/releases/migration-guide-4.3.rst
@@ -26,6 +26,13 @@ Build System
 Kernel
 ******
 
+Base Libraries
+**************
+
+* UTF-8 utils declarations (:c:func:`utf8_trunc`, :c:func:`utf8_lcpy`) have
+  been moved from ``util.h`` to a separate
+  :zephyr_file:`include/zephyr/sys/util_utf8.h` file.
+
 Boards
 ******
 

diff --git a/include/zephyr/sys/util.h b/include/zephyr/sys/util.h
@@ -29,8 +29,6 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
-#include <sys/types.h>
-
 
 /** @brief Number of bits that make up a type */
 #define NUM_BITS(t) (sizeof(t) * BITS_PER_BYTE)
@@ -647,63 +645,6 @@ static inline int64_t sign_extend_64(uint64_t value, uint8_t index)
 	return (int64_t)(value << shift) >> shift;
 }
 
-/**
- * @brief Properly truncate a NULL-terminated UTF-8 string
- *
- * Take a NULL-terminated UTF-8 string and ensure that if the string has been
- * truncated (by setting the NULL terminator) earlier by other means, that
- * the string ends with a properly formatted UTF-8 character (1-4 bytes).
- *
- * Example:
- *
- * @code{.c}
- *      char test_str[] = "€€€";
- *      char trunc_utf8[8];
- *
- *      printf("Original : %s\n", test_str); // €€€
- *      strncpy(trunc_utf8, test_str, sizeof(trunc_utf8));
- *      trunc_utf8[sizeof(trunc_utf8) - 1] = '\0';
- *      printf("Bad      : %s\n", trunc_utf8); // €€�
- *      utf8_trunc(trunc_utf8);
- *      printf("Truncated: %s\n", trunc_utf8); // €€
- * @endcode
- *
- * @param utf8_str NULL-terminated string
- *
- * @return Pointer to the @p utf8_str
- */
-char *utf8_trunc(char *utf8_str);
-
-/**
- * @brief Copies a UTF-8 encoded string from @p src to @p dst
- *
- * The resulting @p dst will always be NULL terminated if @p n is larger than 0,
- * and the @p dst string will always be properly UTF-8 truncated.
- *
- * @param dst The destination of the UTF-8 string.
- * @param src The source string
- * @param n   The size of the @p dst buffer. Maximum number of characters copied
- *            is @p n - 1. If 0 nothing will be done, and the @p dst will not be
- *            NULL terminated.
- *
- * @return Pointer to the @p dst
- */
-char *utf8_lcpy(char *dst, const char *src, size_t n);
-
-/**
- * @brief Counts the characters in a UTF-8 encoded string @p s
- *
- * Counts the number of UTF-8 characters (code points) in a null-terminated string.
- * This function steps through each UTF-8 sequence by checking leading byte patterns.
- * It does not fully validate UTF-8 correctness, only counts characters.
- *
- * @param s The input string
- *
- * @return Number of UTF-8 characters in @p s on success or (negative) error code
- *  otherwise.
- */
-ssize_t utf8_count_chars(const char *s);
-
 #define __z_log2d(x) (32 - __builtin_clz(x) - 1)
 #define __z_log2q(x) (64 - __builtin_clzll(x) - 1)
 #define __z_log2(x) (sizeof(__typeof__(x)) > 4 ? __z_log2q(x) : __z_log2d(x))

diff --git a/include/zephyr/sys/util_utf8.h b/include/zephyr/sys/util_utf8.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright The Zephyr Project Contributors
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file
+ * @brief UTF-8 utilities
+ *
+ * Misc UTF-8 utilities.
+ */
+
+#ifndef ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_
+#define ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup sys-util
+ * @{
+ */
+
+/**
+ * @brief Properly truncate a NULL-terminated UTF-8 string
+ *
+ * Take a NULL-terminated UTF-8 string and ensure that if the string has been
+ * truncated (by setting the NULL terminator) earlier by other means, that
+ * the string ends with a properly formatted UTF-8 character (1-4 bytes).
+ *
+ * Example:
+ *
+ * @code{.c}
+ *      char test_str[] = "€€€";
+ *      char trunc_utf8[8];
+ *
+ *      printf("Original : %s\n", test_str); // €€€
+ *      strncpy(trunc_utf8, test_str, sizeof(trunc_utf8));
+ *      trunc_utf8[sizeof(trunc_utf8) - 1] = '\0';
+ *      printf("Bad      : %s\n", trunc_utf8); // €€�
+ *      utf8_trunc(trunc_utf8);
+ *      printf("Truncated: %s\n", trunc_utf8); // €€
+ * @endcode
+ *
+ * @param utf8_str NULL-terminated string
+ *
+ * @return Pointer to the @p utf8_str
+ */
+char *utf8_trunc(char *utf8_str);
+
+/**
+ * @brief Copies a UTF-8 encoded string from @p src to @p dst
+ *
+ * The resulting @p dst will always be NULL terminated if @p n is larger than 0,
+ * and the @p dst string will always be properly UTF-8 truncated.
+ *
+ * @param dst The destination of the UTF-8 string.
+ * @param src The source string
+ * @param n   The size of the @p dst buffer. Maximum number of characters copied
+ *            is @p n - 1. If 0 nothing will be done, and the @p dst will not be
+ *            NULL terminated.
+ *
+ * @return Pointer to the @p dst
+ */
+char *utf8_lcpy(char *dst, const char *src, size_t n);
+
+/**
+ * @brief Counts the characters in a UTF-8 encoded string @p s
+ *
+ * Counts the number of UTF-8 characters (code points) in a null-terminated string.
+ * This function steps through each UTF-8 sequence by checking leading byte patterns.
+ * It does not fully validate UTF-8 correctness, only counts characters.
+ *
+ * @param s The input string
+ *
+ * @return Number of UTF-8 characters in @p s on success or (negative) error code
+ *  otherwise.
+ */
+int utf8_count_chars(const char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @}
+ */
+
+#endif /* ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_ */
diff --git a/lib/utils/utf8.c b/lib/utils/utf8.c
@@ -8,7 +8,7 @@
 #include <string.h>
 #include <zephyr/sys/__assert.h>
 #include <errno.h>
-#include <sys/types.h>
+#include <zephyr/sys/util_utf8.h>
 
 #define ASCII_CHAR 0x7F
 #define SEQUENCE_FIRST_MASK 0xC0
@@ -83,9 +83,9 @@ char *utf8_lcpy(char *dst, const char *src, size_t n)
 	return dst;
 }
 
-ssize_t utf8_count_chars(const char *s)
+int utf8_count_chars(const char *s)
 {
-	ssize_t count = 0;
+	int count = 0;
 	const char *p = s; /* getting a pointer to increment */
 
 	while (*p != '\0') {

diff --git a/subsys/bluetooth/audio/aics.c b/subsys/bluetooth/audio/aics.c
@@ -27,6 +27,7 @@
 #include <zephyr/sys/check.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 #include <zephyr/sys_clock.h>
 
 #include "aics_internal.h"

diff --git a/subsys/bluetooth/audio/ccp_call_control_server.c b/subsys/bluetooth/audio/ccp_call_control_server.c
@@ -16,6 +16,7 @@
 #include <zephyr/logging/log.h>
 #include <zephyr/sys/check.h>
 #include <zephyr/sys/util.h>
+#include <zephyr/sys/util_utf8.h>
 
 LOG_MODULE_REGISTER(bt_ccp_call_control_server, CONFIG_BT_CCP_CALL_CONTROL_SERVER_LOG_LEVEL);
 

diff --git a/subsys/bluetooth/audio/has.c b/subsys/bluetooth/audio/has.c
@@ -31,6 +31,7 @@
 #include <zephyr/sys/slist.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 #include <zephyr/sys_clock.h>
 #include <zephyr/toolchain.h>
 

diff --git a/subsys/bluetooth/audio/has_client.c b/subsys/bluetooth/audio/has_client.c
@@ -23,6 +23,7 @@
 #include <zephyr/sys/check.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 
 #include "has_internal.h"
 

diff --git a/subsys/bluetooth/audio/shell/bap_broadcast_assistant.c b/subsys/bluetooth/audio/shell/bap_broadcast_assistant.c
@@ -30,6 +30,7 @@
 #include <zephyr/sys/byteorder.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 #include <zephyr/types.h>
 
 #include "common/bt_shell_private.h"

diff --git a/subsys/bluetooth/audio/tbs.c b/subsys/bluetooth/audio/tbs.c
@@ -33,6 +33,7 @@
 #include <zephyr/sys/check.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 #include <zephyr/types.h>
 
 #include "audio_internal.h"

diff --git a/subsys/bluetooth/audio/tbs_client.c b/subsys/bluetooth/audio/tbs_client.c
@@ -28,6 +28,7 @@
 #include <zephyr/sys/slist.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 #include <zephyr/toolchain.h>
 #include <zephyr/types.h>
 #include <zephyr/sys/check.h>

diff --git a/subsys/bluetooth/audio/vocs.c b/subsys/bluetooth/audio/vocs.c
@@ -29,6 +29,7 @@
 #include <zephyr/sys/check.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/sys/util_macro.h>
+#include <zephyr/sys/util_utf8.h>
 #include <zephyr/sys_clock.h>
 
 #include "audio_internal.h"

diff --git a/tests/posix/headers/src/types_h.c b/tests/posix/headers/src/types_h.c
@@ -0,0 +1,12 @@
+/*
+ * Copyright The Zephyr Project Contributors
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/* _GNU_SOURCE causes extra headers to be included and can cause dependency
+ * loops
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
diff --git a/tests/unit/util/main.c b/tests/unit/util/main.c
@@ -5,7 +5,7 @@
  */
 
 #include <zephyr/ztest.h>
-#include <zephyr/sys/util.h>
+#include <zephyr/sys/util_utf8.h>
 #include <stdio.h>
 #include <string.h>
 
@@ -1008,24 +1008,24 @@ ZTEST(util, test_utf8_lcpy_null_termination)
 ZTEST(util, test_utf8_count_chars_ASCII)
 {
 	const char *test_str = "I have 15 char.";
-	ssize_t count = utf8_count_chars(test_str);
+	int count = utf8_count_chars(test_str);
 
 	zassert_equal(count, 15, "Failed to count ASCII");
 }
 
 ZTEST(util, test_utf8_count_chars_non_ASCII)
 {
 	const char *test_str = "Hello دنیا!🌍";
-	ssize_t count = utf8_count_chars(test_str);
+	int count = utf8_count_chars(test_str);
 
 	zassert_equal(count, 12, "Failed to count non-ASCII");
 }
 
 ZTEST(util, test_utf8_count_chars_invalid_utf)
 {
 	const char test_str[] = { (char)0x80, 0x00 };
-	ssize_t count = utf8_count_chars(test_str);
-	ssize_t expected_result = -EINVAL;
+	int count = utf8_count_chars(test_str);
+	int expected_result = -EINVAL;
 
 	zassert_equal(count, expected_result, "Failed to detect invalid UTF");
 }