Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/releases/migration-guide-4.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ Build System
Kernel
******

Base Libraries
**************

* UTF-8 utils declarations (:c:func:`utf8_trunc`, :c:func:`utf8_lcpy`) have
been moved from ``util.h`` to a separate
:zephyr_file:`include/zephyr/sys/util_utf8.h` file.

Boards
******

Expand Down
59 changes: 0 additions & 59 deletions include/zephyr/sys/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>


/** @brief Number of bits that make up a type */
#define NUM_BITS(t) (sizeof(t) * BITS_PER_BYTE)
Expand Down Expand Up @@ -647,63 +645,6 @@ static inline int64_t sign_extend_64(uint64_t value, uint8_t index)
return (int64_t)(value << shift) >> shift;
}

/**
* @brief Properly truncate a NULL-terminated UTF-8 string
*
* Take a NULL-terminated UTF-8 string and ensure that if the string has been
* truncated (by setting the NULL terminator) earlier by other means, that
* the string ends with a properly formatted UTF-8 character (1-4 bytes).
*
* Example:
*
* @code{.c}
* char test_str[] = "€€€";
* char trunc_utf8[8];
*
* printf("Original : %s\n", test_str); // €€€
* strncpy(trunc_utf8, test_str, sizeof(trunc_utf8));
* trunc_utf8[sizeof(trunc_utf8) - 1] = '\0';
* printf("Bad : %s\n", trunc_utf8); // €€�
* utf8_trunc(trunc_utf8);
* printf("Truncated: %s\n", trunc_utf8); // €€
* @endcode
*
* @param utf8_str NULL-terminated string
*
* @return Pointer to the @p utf8_str
*/
char *utf8_trunc(char *utf8_str);

/**
* @brief Copies a UTF-8 encoded string from @p src to @p dst
*
* The resulting @p dst will always be NULL terminated if @p n is larger than 0,
* and the @p dst string will always be properly UTF-8 truncated.
*
* @param dst The destination of the UTF-8 string.
* @param src The source string
* @param n The size of the @p dst buffer. Maximum number of characters copied
* is @p n - 1. If 0 nothing will be done, and the @p dst will not be
* NULL terminated.
*
* @return Pointer to the @p dst
*/
char *utf8_lcpy(char *dst, const char *src, size_t n);

/**
* @brief Counts the characters in a UTF-8 encoded string @p s
*
* Counts the number of UTF-8 characters (code points) in a null-terminated string.
* This function steps through each UTF-8 sequence by checking leading byte patterns.
* It does not fully validate UTF-8 correctness, only counts characters.
*
* @param s The input string
*
* @return Number of UTF-8 characters in @p s on success or (negative) error code
* otherwise.
*/
ssize_t utf8_count_chars(const char *s);

#define __z_log2d(x) (32 - __builtin_clz(x) - 1)
#define __z_log2q(x) (64 - __builtin_clzll(x) - 1)
#define __z_log2(x) (sizeof(__typeof__(x)) > 4 ? __z_log2q(x) : __z_log2d(x))
Expand Down
93 changes: 93 additions & 0 deletions include/zephyr/sys/util_utf8.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright The Zephyr Project Contributors
*
* SPDX-License-Identifier: Apache-2.0
*/

/**
* @file
* @brief UTF-8 utilities
*
* Misc UTF-8 utilities.
*/

#ifndef ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_
#define ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
* @addtogroup sys-util
* @{
*/

/**
* @brief Properly truncate a NULL-terminated UTF-8 string
*
* Take a NULL-terminated UTF-8 string and ensure that if the string has been
* truncated (by setting the NULL terminator) earlier by other means, that
* the string ends with a properly formatted UTF-8 character (1-4 bytes).
*
* Example:
*
* @code{.c}
* char test_str[] = "€€€";
* char trunc_utf8[8];
*
* printf("Original : %s\n", test_str); // €€€
* strncpy(trunc_utf8, test_str, sizeof(trunc_utf8));
* trunc_utf8[sizeof(trunc_utf8) - 1] = '\0';
* printf("Bad : %s\n", trunc_utf8); // €€�
* utf8_trunc(trunc_utf8);
* printf("Truncated: %s\n", trunc_utf8); // €€
* @endcode
*
* @param utf8_str NULL-terminated string
*
* @return Pointer to the @p utf8_str
*/
char *utf8_trunc(char *utf8_str);

/**
* @brief Copies a UTF-8 encoded string from @p src to @p dst
*
* The resulting @p dst will always be NULL terminated if @p n is larger than 0,
* and the @p dst string will always be properly UTF-8 truncated.
*
* @param dst The destination of the UTF-8 string.
* @param src The source string
* @param n The size of the @p dst buffer. Maximum number of characters copied
* is @p n - 1. If 0 nothing will be done, and the @p dst will not be
* NULL terminated.
*
* @return Pointer to the @p dst
*/
char *utf8_lcpy(char *dst, const char *src, size_t n);

/**
* @brief Counts the characters in a UTF-8 encoded string @p s
*
* Counts the number of UTF-8 characters (code points) in a null-terminated string.
* This function steps through each UTF-8 sequence by checking leading byte patterns.
* It does not fully validate UTF-8 correctness, only counts characters.
*
* @param s The input string
*
* @return Number of UTF-8 characters in @p s on success or (negative) error code
* otherwise.
*/
int utf8_count_chars(const char *s);

#ifdef __cplusplus
}
#endif

/**
* @}
*/

#endif /* ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_ */
6 changes: 3 additions & 3 deletions lib/utils/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <string.h>
#include <zephyr/sys/__assert.h>
#include <errno.h>
#include <sys/types.h>
#include <zephyr/sys/util_utf8.h>

#define ASCII_CHAR 0x7F
#define SEQUENCE_FIRST_MASK 0xC0
Expand Down Expand Up @@ -83,9 +83,9 @@ char *utf8_lcpy(char *dst, const char *src, size_t n)
return dst;
}

ssize_t utf8_count_chars(const char *s)
int utf8_count_chars(const char *s)
{
ssize_t count = 0;
int count = 0;
const char *p = s; /* getting a pointer to increment */

while (*p != '\0') {
Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/aics.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <zephyr/sys/check.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>
#include <zephyr/sys_clock.h>

#include "aics_internal.h"
Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/ccp_call_control_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <zephyr/logging/log.h>
#include <zephyr/sys/check.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_utf8.h>

LOG_MODULE_REGISTER(bt_ccp_call_control_server, CONFIG_BT_CCP_CALL_CONTROL_SERVER_LOG_LEVEL);

Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/has.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <zephyr/sys/slist.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>
#include <zephyr/sys_clock.h>
#include <zephyr/toolchain.h>

Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/has_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <zephyr/sys/check.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>

#include "has_internal.h"

Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/shell/bap_broadcast_assistant.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <zephyr/sys/byteorder.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>
#include <zephyr/types.h>

#include "common/bt_shell_private.h"
Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/tbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <zephyr/sys/check.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>
#include <zephyr/types.h>

#include "audio_internal.h"
Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/tbs_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <zephyr/sys/slist.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>
#include <zephyr/toolchain.h>
#include <zephyr/types.h>
#include <zephyr/sys/check.h>
Expand Down
1 change: 1 addition & 0 deletions subsys/bluetooth/audio/vocs.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <zephyr/sys/check.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_macro.h>
#include <zephyr/sys/util_utf8.h>
#include <zephyr/sys_clock.h>

#include "audio_internal.h"
Expand Down
12 changes: 12 additions & 0 deletions tests/posix/headers/src/types_h.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright The Zephyr Project Contributors
*
* SPDX-License-Identifier: Apache-2.0
*/

/* _GNU_SOURCE causes extra headers to be included and can cause dependency
* loops
*/
#define _GNU_SOURCE

Check failure on line 10 in tests/posix/headers/src/types_h.c

View workflow job for this annotation

GitHub Actions / Run compliance checks on patch series (PR)

API_DEFINE

tests/posix/headers/src/types_h.c:10 do not specify non-standard feature test macros for embedded code

#include <sys/types.h>
10 changes: 5 additions & 5 deletions tests/unit/util/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*/

#include <zephyr/ztest.h>
#include <zephyr/sys/util.h>
#include <zephyr/sys/util_utf8.h>
#include <stdio.h>
#include <string.h>

Expand Down Expand Up @@ -1008,24 +1008,24 @@ ZTEST(util, test_utf8_lcpy_null_termination)
ZTEST(util, test_utf8_count_chars_ASCII)
{
const char *test_str = "I have 15 char.";
ssize_t count = utf8_count_chars(test_str);
int count = utf8_count_chars(test_str);

zassert_equal(count, 15, "Failed to count ASCII");
}

ZTEST(util, test_utf8_count_chars_non_ASCII)
{
const char *test_str = "Hello دنیا!🌍";
ssize_t count = utf8_count_chars(test_str);
int count = utf8_count_chars(test_str);

zassert_equal(count, 12, "Failed to count non-ASCII");
}

ZTEST(util, test_utf8_count_chars_invalid_utf)
{
const char test_str[] = { (char)0x80, 0x00 };
ssize_t count = utf8_count_chars(test_str);
ssize_t expected_result = -EINVAL;
int count = utf8_count_chars(test_str);
int expected_result = -EINVAL;

zassert_equal(count, expected_result, "Failed to detect invalid UTF");
}
Expand Down
Loading