From 26eea2f63594208f71f49fd0874a551acbce8558 Mon Sep 17 00:00:00 2001 From: Bret Ambrose Date: Wed, 23 Aug 2023 10:24:37 -0700 Subject: [PATCH] Part 1 - refactor utf8 utilities to shared --- include/aws/mqtt/mqtt.h | 9 ++ include/aws/mqtt/private/v5/mqtt5_utils.h | 8 -- include/aws/mqtt/v5/mqtt5_client.h | 12 -- source/mqtt.c | 36 ++++- source/v5/mqtt5_options_storage.c | 22 +-- source/v5/mqtt5_utils.c | 53 -------- tests/CMakeLists.txt | 4 +- tests/shared_utils.c | 158 ++++++++++++++++++++++ tests/v5/mqtt5_utils_tests.c | 149 -------------------- 9 files changed, 215 insertions(+), 236 deletions(-) create mode 100644 tests/shared_utils.c diff --git a/include/aws/mqtt/mqtt.h b/include/aws/mqtt/mqtt.h index 7385b772..9fc8001c 100644 --- a/include/aws/mqtt/mqtt.h +++ b/include/aws/mqtt/mqtt.h @@ -101,9 +101,18 @@ AWS_EXTERN_C_BEGIN AWS_MQTT_API bool aws_mqtt_is_valid_topic(const struct aws_byte_cursor *topic); + AWS_MQTT_API bool aws_mqtt_is_valid_topic_filter(const struct aws_byte_cursor *topic_filter); +/** + * Validate utf-8 string under mqtt specs + * + * @param text + * @return AWS_OP_SUCCESS if the text is validate, otherwise AWS_OP_ERR + */ +AWS_MQTT_API int aws_mqtt_validate_utf8_text(struct aws_byte_cursor text); + /** * Initializes internal datastructures used by aws-c-mqtt. * Must be called before using any functionality in aws-c-mqtt. diff --git a/include/aws/mqtt/private/v5/mqtt5_utils.h b/include/aws/mqtt/private/v5/mqtt5_utils.h index 3f7bebde..9a05dedf 100644 --- a/include/aws/mqtt/private/v5/mqtt5_utils.h +++ b/include/aws/mqtt/private/v5/mqtt5_utils.h @@ -94,14 +94,6 @@ AWS_EXTERN_C_BEGIN */ AWS_MQTT_API extern struct aws_byte_cursor g_aws_mqtt5_connect_protocol_cursor; -/** - * Validate utf-8 string under mqtt5 specs - * - * @param text - * @return AWS_OP_SUCCESS if the text is validate, otherwise AWS_OP_ERR - */ -AWS_MQTT_API int aws_mqtt5_validate_utf8_text(struct aws_byte_cursor text); - /** * Simple helper function to compute the first byte of an MQTT packet encoding as a function of 4 bit flags * and the packet type. diff --git a/include/aws/mqtt/v5/mqtt5_client.h b/include/aws/mqtt/v5/mqtt5_client.h index 97e1225f..025c03ba 100644 --- a/include/aws/mqtt/v5/mqtt5_client.h +++ b/include/aws/mqtt/v5/mqtt5_client.h @@ -787,18 +787,6 @@ AWS_MQTT_API int aws_mqtt5_negotiated_settings_init( struct aws_mqtt5_negotiated_settings *negotiated_settings, const struct aws_byte_cursor *client_id); -/** - * Makes an owning copy of a negotiated settings structure - * - * @param source settings to copy from - * @param dest settings to copy into. Must be in a zeroed or initialized state because it gets clean up - * called on it as the first step of the copy process. - * @return success/failure - */ -AWS_MQTT_API int aws_mqtt5_negotiated_settings_copy( - const struct aws_mqtt5_negotiated_settings *source, - struct aws_mqtt5_negotiated_settings *dest); - /** * Clean up owned memory in negotiated_settings * diff --git a/source/mqtt.c b/source/mqtt.c index 5e6fbe8a..8edc3d23 100644 --- a/source/mqtt.c +++ b/source/mqtt.c @@ -5,8 +5,8 @@ #include +#include #include - #include /******************************************************************************* @@ -289,3 +289,37 @@ void aws_mqtt_fatal_assert_library_initialized(void) { AWS_FATAL_ASSERT(s_mqtt_library_initialized); } } + +/* UTF-8 encoded string validation respect to [MQTT-1.5.3-2]. */ +static int aws_mqtt_utf8_decoder(uint32_t codepoint, void *user_data) { + (void)user_data; + /* U+0000 - A UTF-8 Encoded String MUST NOT include an encoding of the null character U+0000. [MQTT-1.5.4-2] + * U+0001..U+001F control characters are not valid + */ + if (AWS_UNLIKELY(codepoint <= 0x001F)) { + return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); + } + + /* U+007F..U+009F control characters are not valid */ + if (AWS_UNLIKELY((codepoint >= 0x007F) && (codepoint <= 0x009F))) { + return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); + } + + /* Unicode non-characters are not valid: https://www.unicode.org/faq/private_use.html#nonchar1 */ + if (AWS_UNLIKELY((codepoint & 0x00FFFF) >= 0x00FFFE)) { + return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); + } + if (AWS_UNLIKELY(codepoint >= 0xFDD0 && codepoint <= 0xFDEF)) { + return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); + } + + return AWS_ERROR_SUCCESS; +} + +static struct aws_utf8_decoder_options s_aws_mqtt_utf8_decoder_options = { + .on_codepoint = aws_mqtt_utf8_decoder, +}; + +int aws_mqtt_validate_utf8_text(struct aws_byte_cursor text) { + return aws_decode_utf8(text, &s_aws_mqtt_utf8_decoder_options); +} diff --git a/source/v5/mqtt5_options_storage.c b/source/v5/mqtt5_options_storage.c index 465ccf1b..10625fa5 100644 --- a/source/v5/mqtt5_options_storage.c +++ b/source/v5/mqtt5_options_storage.c @@ -172,7 +172,7 @@ static int s_aws_mqtt5_user_property_set_validate( return aws_raise_error(AWS_ERROR_MQTT5_USER_PROPERTY_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(property->name)) { + if (aws_mqtt_validate_utf8_text(property->name)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: %s - user property #%zu name not valid UTF8", log_context, log_prefix, i); return aws_raise_error(AWS_ERROR_MQTT5_USER_PROPERTY_VALIDATION); @@ -187,7 +187,7 @@ static int s_aws_mqtt5_user_property_set_validate( property->value.len); return aws_raise_error(AWS_ERROR_MQTT5_USER_PROPERTY_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(property->value)) { + if (aws_mqtt_validate_utf8_text(property->value)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: %s - user property #%zu value not valid UTF8", @@ -332,7 +332,7 @@ int aws_mqtt5_packet_connect_view_validate(const struct aws_mqtt5_packet_connect return aws_raise_error(AWS_ERROR_MQTT5_CONNECT_OPTIONS_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(connect_options->client_id)) { + if (aws_mqtt_validate_utf8_text(connect_options->client_id)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_connect_view - client id not valid UTF-8", @@ -349,7 +349,7 @@ int aws_mqtt5_packet_connect_view_validate(const struct aws_mqtt5_packet_connect return aws_raise_error(AWS_ERROR_MQTT5_CONNECT_OPTIONS_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(*connect_options->username)) { + if (aws_mqtt_validate_utf8_text(*connect_options->username)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_connect_view - username not valid UTF-8", @@ -1259,7 +1259,7 @@ int aws_mqtt5_packet_disconnect_view_validate(const struct aws_mqtt5_packet_disc return aws_raise_error(AWS_ERROR_MQTT5_DISCONNECT_OPTIONS_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(*disconnect_view->reason_string)) { + if (aws_mqtt_validate_utf8_text(*disconnect_view->reason_string)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_disconnect_view - reason string not valid UTF-8", @@ -1591,7 +1591,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - missing topic", (void *)publish_view); return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION); - } else if (aws_mqtt5_validate_utf8_text(publish_view->topic)) { + } else if (aws_mqtt_validate_utf8_text(publish_view->topic)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - topic not valid UTF-8", (void *)publish_view); return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION); @@ -1626,7 +1626,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish // Make sure the payload data is UTF-8 if the payload_format set to UTF8 if (*publish_view->payload_format == AWS_MQTT5_PFI_UTF8) { - if (aws_mqtt5_validate_utf8_text(publish_view->payload)) { + if (aws_mqtt_validate_utf8_text(publish_view->payload)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - payload value is not valid UTF-8 while payload format " @@ -1646,7 +1646,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(*publish_view->response_topic)) { + if (aws_mqtt_validate_utf8_text(*publish_view->response_topic)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - response topic not valid UTF-8", @@ -1692,7 +1692,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION); } - if (aws_mqtt5_validate_utf8_text(*publish_view->content_type)) { + if (aws_mqtt_validate_utf8_text(*publish_view->content_type)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - content type not valid UTF-8", @@ -2332,7 +2332,7 @@ int aws_mqtt5_packet_unsubscribe_view_validate(const struct aws_mqtt5_packet_uns for (size_t i = 0; i < unsubscribe_view->topic_filter_count; ++i) { const struct aws_byte_cursor *topic_filter = &unsubscribe_view->topic_filters[i]; - if (aws_mqtt5_validate_utf8_text(*topic_filter)) { + if (aws_mqtt_validate_utf8_text(*topic_filter)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_unsubscribe_view - topic filter not valid UTF-8: \"" PRInSTR "\"", @@ -2603,7 +2603,7 @@ static int s_aws_mqtt5_validate_subscription( const struct aws_mqtt5_subscription_view *subscription, void *log_context) { - if (aws_mqtt5_validate_utf8_text(subscription->topic_filter)) { + if (aws_mqtt_validate_utf8_text(subscription->topic_filter)) { AWS_LOGF_ERROR( AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_subscribe_view - topic filter \"" PRInSTR "\" not valid UTF-8 in subscription", diff --git a/source/v5/mqtt5_utils.c b/source/v5/mqtt5_utils.c index 848f7580..d4444203 100644 --- a/source/v5/mqtt5_utils.c +++ b/source/v5/mqtt5_utils.c @@ -7,7 +7,6 @@ #include #include -#include #include uint8_t aws_mqtt5_compute_fixed_header_byte1(enum aws_mqtt5_packet_type packet_type, uint8_t flags) { @@ -124,24 +123,6 @@ int aws_mqtt5_negotiated_settings_init( return AWS_OP_SUCCESS; } -int aws_mqtt5_negotiated_settings_copy( - const struct aws_mqtt5_negotiated_settings *source, - struct aws_mqtt5_negotiated_settings *dest) { - aws_mqtt5_negotiated_settings_clean_up(dest); - - *dest = *source; - AWS_ZERO_STRUCT(dest->client_id_storage); - - if (source->client_id_storage.allocator != NULL) { - return aws_byte_buf_init_copy_from_cursor( - &dest->client_id_storage, - source->client_id_storage.allocator, - aws_byte_cursor_from_buf(&source->client_id_storage)); - } - - return AWS_OP_SUCCESS; -} - int aws_mqtt5_negotiated_settings_apply_client_id( struct aws_mqtt5_negotiated_settings *negotiated_settings, const struct aws_byte_cursor *client_id) { @@ -553,37 +534,3 @@ bool aws_mqtt_is_topic_filter_shared_subscription(struct aws_byte_cursor topic_c return true; } - -/* UTF-8 encoded string validation respect to [MQTT-1.5.3-2]. */ -static int aws_mqtt5_utf8_decoder(uint32_t codepoint, void *user_data) { - (void)user_data; - /* U+0000 - A UTF-8 Encoded String MUST NOT include an encoding of the null character U+0000. [MQTT-1.5.4-2] - * U+0001..U+001F control characters are not valid - */ - if (AWS_UNLIKELY(codepoint <= 0x001F)) { - return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); - } - - /* U+007F..U+009F control characters are not valid */ - if (AWS_UNLIKELY((codepoint >= 0x007F) && (codepoint <= 0x009F))) { - return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); - } - - /* Unicode non-characters are not valid: https://www.unicode.org/faq/private_use.html#nonchar1 */ - if (AWS_UNLIKELY((codepoint & 0x00FFFF) >= 0x00FFFE)) { - return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); - } - if (AWS_UNLIKELY(codepoint >= 0xFDD0 && codepoint <= 0xFDEF)) { - return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING); - } - - return AWS_ERROR_SUCCESS; -} - -struct aws_utf8_decoder_options g_aws_mqtt5_utf8_decoder_options = { - .on_codepoint = aws_mqtt5_utf8_decoder, -}; - -int aws_mqtt5_validate_utf8_text(struct aws_byte_cursor text) { - return aws_decode_utf8(text, &g_aws_mqtt5_utf8_decoder_options); -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c5928f19..0ee8847d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,7 +4,7 @@ include(AwsLibFuzzer) enable_testing() file(GLOB TEST_HDRS "v3/*.h v5/*.h") -set(TEST_SRC v3/*.c v5/*.c) +set(TEST_SRC v3/*.c v5/*.c *.c) file(GLOB TESTS ${TEST_HDRS} ${TEST_SRC}) add_test_case(mqtt_packet_puback) @@ -112,7 +112,7 @@ add_test_case(mqtt5_topic_get_segment_count) add_test_case(mqtt5_shared_subscription_validation) # utf8 utility -add_test_case(mqtt5_utf8_encoded_string_test) +add_test_case(mqtt_utf8_encoded_string_test) # topic aliasing add_test_case(mqtt5_inbound_topic_alias_register_failure) diff --git a/tests/shared_utils.c b/tests/shared_utils.c new file mode 100644 index 00000000..81ba7dab --- /dev/null +++ b/tests/shared_utils.c @@ -0,0 +1,158 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#include +#include + +#include + +struct utf8_example { + const char *name; + struct aws_byte_cursor text; +}; + +static struct utf8_example s_valid_mqtt_utf8_examples[] = { + { + .name = "1 letter", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("a"), + }, + { + .name = "Several ascii letters", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("ascii word"), + }, + { + .name = "empty string", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL(""), + }, + { + .name = "2 byte codepoint", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xC2\xA3"), + }, + { + .name = "3 byte codepoint", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xE2\x82\xAC"), + }, + { + .name = "4 byte codepoint", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF0\x90\x8D\x88"), + }, + { + .name = "A variety of different length codepoints", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL( + "\xF0\x90\x8D\x88\xE2\x82\xAC\xC2\xA3\x24\xC2\xA3\xE2\x82\xAC\xF0\x90\x8D\x88"), + }, + { + .name = "UTF8 BOM", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBB\xBF"), + }, + { + .name = "UTF8 BOM plus extra", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBB\xBF\x24\xC2\xA3"), + }, + { + .name = "First possible 3 byte codepoint", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xE0\xA0\x80"), + }, + { + .name = "First possible 4 byte codepoint", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF0\x90\x80\x80"), + }, + { + .name = "Last possible 2 byte codepoint", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xDF\xBF"), + }, + { + .name = "Last valid codepoint before prohibited range U+D800 - U+DFFF", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xED\x9F\xBF"), + }, + { + .name = "Next valid codepoint after prohibited range U+D800 - U+DFFF", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEE\x80\x80"), + }, + { + .name = "Boundary condition", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBF\xBD"), + }, + { + .name = "Boundary condition", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF4\x90\x80\x80"), + }, +}; + +static struct utf8_example s_illegal_mqtt_utf8_examples[] = { + { + .name = "non character U+0000", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x00"), + }, + { + .name = "Codepoint in prohibited range U+0001 - U+001F (in the middle)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x04"), + }, + { + .name = "Codepoint in prohibited range U+0001 - U+001F (boundary)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x1F"), + }, + { + .name = "Codepoint in prohibited range U+007F - U+009F (min: U+7F)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x7F"), + }, + { + .name = "Codepoint in prohibited range U+007F - U+009F (in the middle u+8F)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xC2\x8F"), + }, + { + .name = "Codepoint in prohibited range U+007F - U+009F (boundary U+9F)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xC2\x9F"), + }, + { + .name = "non character end with U+FFFF", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBF\xBF"), + }, + { + .name = "non character end with U+FFFE", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF7\xBF\xBF\xBE"), + }, + { + .name = "non character in U+FDD0 - U+FDEF (lower bound)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xB7\x90"), + }, + { + .name = "non character in U+FDD0 - U+FDEF (in middle)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xB7\xA1"), + }, + { + .name = "non character in U+FDD0 - U+FDEF (upper bound)", + .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xB7\xAF"), + }}; + +static int s_mqtt_utf8_encoded_string_test(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + /* Check the valid test cases */ + for (size_t i = 0; i < AWS_ARRAY_SIZE(s_valid_mqtt_utf8_examples); ++i) { + struct utf8_example example = s_valid_mqtt_utf8_examples[i]; + printf("valid example [%zu]: %s\n", i, example.name); + ASSERT_SUCCESS(aws_mqtt_validate_utf8_text(example.text)); + } + + /* Glue all the valid test cases together, they ought to pass */ + struct aws_byte_buf all_good_text; + aws_byte_buf_init(&all_good_text, allocator, 1024); + for (size_t i = 0; i < AWS_ARRAY_SIZE(s_valid_mqtt_utf8_examples); ++i) { + aws_byte_buf_append_dynamic(&all_good_text, &s_valid_mqtt_utf8_examples[i].text); + } + ASSERT_SUCCESS(aws_mqtt_validate_utf8_text(aws_byte_cursor_from_buf(&all_good_text))); + aws_byte_buf_clean_up(&all_good_text); + + /* Check the illegal test cases */ + for (size_t i = 0; i < AWS_ARRAY_SIZE(s_illegal_mqtt_utf8_examples); ++i) { + struct utf8_example example = s_illegal_mqtt_utf8_examples[i]; + printf("illegal example [%zu]: %s\n", i, example.name); + ASSERT_FAILS(aws_mqtt_validate_utf8_text(example.text)); + } + + return AWS_OP_SUCCESS; +} + +AWS_TEST_CASE(mqtt_utf8_encoded_string_test, s_mqtt_utf8_encoded_string_test) \ No newline at end of file diff --git a/tests/v5/mqtt5_utils_tests.c b/tests/v5/mqtt5_utils_tests.c index fd14d027..d2908930 100644 --- a/tests/v5/mqtt5_utils_tests.c +++ b/tests/v5/mqtt5_utils_tests.c @@ -137,152 +137,3 @@ static int s_mqtt5_shared_subscription_validation_fn(struct aws_allocator *alloc } AWS_TEST_CASE(mqtt5_shared_subscription_validation, s_mqtt5_shared_subscription_validation_fn) - -struct utf8_example { - const char *name; - struct aws_byte_cursor text; -}; - -static struct utf8_example s_valid_mqtt5_utf8_examples[] = { - { - .name = "1 letter", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("a"), - }, - { - .name = "Several ascii letters", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("ascii word"), - }, - { - .name = "empty string", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL(""), - }, - { - .name = "2 byte codepoint", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xC2\xA3"), - }, - { - .name = "3 byte codepoint", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xE2\x82\xAC"), - }, - { - .name = "4 byte codepoint", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF0\x90\x8D\x88"), - }, - { - .name = "A variety of different length codepoints", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL( - "\xF0\x90\x8D\x88\xE2\x82\xAC\xC2\xA3\x24\xC2\xA3\xE2\x82\xAC\xF0\x90\x8D\x88"), - }, - { - .name = "UTF8 BOM", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBB\xBF"), - }, - { - .name = "UTF8 BOM plus extra", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBB\xBF\x24\xC2\xA3"), - }, - { - .name = "First possible 3 byte codepoint", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xE0\xA0\x80"), - }, - { - .name = "First possible 4 byte codepoint", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF0\x90\x80\x80"), - }, - { - .name = "Last possible 2 byte codepoint", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xDF\xBF"), - }, - { - .name = "Last valid codepoint before prohibited range U+D800 - U+DFFF", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xED\x9F\xBF"), - }, - { - .name = "Next valid codepoint after prohibited range U+D800 - U+DFFF", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEE\x80\x80"), - }, - { - .name = "Boundary condition", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBF\xBD"), - }, - { - .name = "Boundary condition", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF4\x90\x80\x80"), - }, -}; - -static struct utf8_example s_illegal_mqtt5_utf8_examples[] = { - { - .name = "non character U+0000", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x00"), - }, - { - .name = "Codepoint in prohibited range U+0001 - U+001F (in the middle)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x04"), - }, - { - .name = "Codepoint in prohibited range U+0001 - U+001F (boundary)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x1F"), - }, - { - .name = "Codepoint in prohibited range U+007F - U+009F (min: U+7F)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\x7F"), - }, - { - .name = "Codepoint in prohibited range U+007F - U+009F (in the middle u+8F)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xC2\x8F"), - }, - { - .name = "Codepoint in prohibited range U+007F - U+009F (boundary U+9F)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xC2\x9F"), - }, - { - .name = "non character end with U+FFFF", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xBF\xBF"), - }, - { - .name = "non character end with U+FFFE", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xF7\xBF\xBF\xBE"), - }, - { - .name = "non character in U+FDD0 - U+FDEF (lower bound)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xB7\x90"), - }, - { - .name = "non character in U+FDD0 - U+FDEF (in middle)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xB7\xA1"), - }, - { - .name = "non character in U+FDD0 - U+FDEF (upper bound)", - .text = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("\xEF\xB7\xAF"), - }}; - -static int s_mqtt5_utf8_encoded_string_test(struct aws_allocator *allocator, void *ctx) { - (void)ctx; - /* Check the valid test cases */ - for (size_t i = 0; i < AWS_ARRAY_SIZE(s_valid_mqtt5_utf8_examples); ++i) { - struct utf8_example example = s_valid_mqtt5_utf8_examples[i]; - printf("valid example [%zu]: %s\n", i, example.name); - ASSERT_SUCCESS(aws_mqtt5_validate_utf8_text(example.text)); - } - - /* Glue all the valid test cases together, they ought to pass */ - struct aws_byte_buf all_good_text; - aws_byte_buf_init(&all_good_text, allocator, 1024); - for (size_t i = 0; i < AWS_ARRAY_SIZE(s_valid_mqtt5_utf8_examples); ++i) { - aws_byte_buf_append_dynamic(&all_good_text, &s_valid_mqtt5_utf8_examples[i].text); - } - ASSERT_SUCCESS(aws_mqtt5_validate_utf8_text(aws_byte_cursor_from_buf(&all_good_text))); - aws_byte_buf_clean_up(&all_good_text); - - /* Check the illegal test cases */ - for (size_t i = 0; i < AWS_ARRAY_SIZE(s_illegal_mqtt5_utf8_examples); ++i) { - struct utf8_example example = s_illegal_mqtt5_utf8_examples[i]; - printf("illegal example [%zu]: %s\n", i, example.name); - ASSERT_FAILS(aws_mqtt5_validate_utf8_text(example.text)); - } - - return AWS_OP_SUCCESS; -} - -AWS_TEST_CASE(mqtt5_utf8_encoded_string_test, s_mqtt5_utf8_encoded_string_test)