Skip to content

Commit

Permalink
Part 1 - refactor utf8 utilities to shared
Browse files Browse the repository at this point in the history
  • Loading branch information
Bret Ambrose committed Aug 23, 2023
1 parent 2529907 commit 26eea2f
Show file tree
Hide file tree
Showing 9 changed files with 215 additions and 236 deletions.
9 changes: 9 additions & 0 deletions include/aws/mqtt/mqtt.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,18 @@ AWS_EXTERN_C_BEGIN

AWS_MQTT_API
bool aws_mqtt_is_valid_topic(const struct aws_byte_cursor *topic);

AWS_MQTT_API
bool aws_mqtt_is_valid_topic_filter(const struct aws_byte_cursor *topic_filter);

/**
* Validate utf-8 string under mqtt specs
*
* @param text
* @return AWS_OP_SUCCESS if the text is validate, otherwise AWS_OP_ERR
*/
AWS_MQTT_API int aws_mqtt_validate_utf8_text(struct aws_byte_cursor text);

/**
* Initializes internal datastructures used by aws-c-mqtt.
* Must be called before using any functionality in aws-c-mqtt.
Expand Down
8 changes: 0 additions & 8 deletions include/aws/mqtt/private/v5/mqtt5_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,6 @@ AWS_EXTERN_C_BEGIN
*/
AWS_MQTT_API extern struct aws_byte_cursor g_aws_mqtt5_connect_protocol_cursor;

/**
* Validate utf-8 string under mqtt5 specs
*
* @param text
* @return AWS_OP_SUCCESS if the text is validate, otherwise AWS_OP_ERR
*/
AWS_MQTT_API int aws_mqtt5_validate_utf8_text(struct aws_byte_cursor text);

/**
* Simple helper function to compute the first byte of an MQTT packet encoding as a function of 4 bit flags
* and the packet type.
Expand Down
12 changes: 0 additions & 12 deletions include/aws/mqtt/v5/mqtt5_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -787,18 +787,6 @@ AWS_MQTT_API int aws_mqtt5_negotiated_settings_init(
struct aws_mqtt5_negotiated_settings *negotiated_settings,
const struct aws_byte_cursor *client_id);

/**
* Makes an owning copy of a negotiated settings structure
*
* @param source settings to copy from
* @param dest settings to copy into. Must be in a zeroed or initialized state because it gets clean up
* called on it as the first step of the copy process.
* @return success/failure
*/
AWS_MQTT_API int aws_mqtt5_negotiated_settings_copy(
const struct aws_mqtt5_negotiated_settings *source,
struct aws_mqtt5_negotiated_settings *dest);

/**
* Clean up owned memory in negotiated_settings
*
Expand Down
36 changes: 35 additions & 1 deletion source/mqtt.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

#include <aws/mqtt/mqtt.h>

#include <aws/common/encoding.h>
#include <aws/http/http.h>

#include <aws/io/logging.h>

/*******************************************************************************
Expand Down Expand Up @@ -289,3 +289,37 @@ void aws_mqtt_fatal_assert_library_initialized(void) {
AWS_FATAL_ASSERT(s_mqtt_library_initialized);
}
}

/* UTF-8 encoded string validation respect to [MQTT-1.5.3-2]. */
static int aws_mqtt_utf8_decoder(uint32_t codepoint, void *user_data) {
(void)user_data;
/* U+0000 - A UTF-8 Encoded String MUST NOT include an encoding of the null character U+0000. [MQTT-1.5.4-2]
* U+0001..U+001F control characters are not valid
*/
if (AWS_UNLIKELY(codepoint <= 0x001F)) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}

/* U+007F..U+009F control characters are not valid */
if (AWS_UNLIKELY((codepoint >= 0x007F) && (codepoint <= 0x009F))) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}

/* Unicode non-characters are not valid: https://www.unicode.org/faq/private_use.html#nonchar1 */
if (AWS_UNLIKELY((codepoint & 0x00FFFF) >= 0x00FFFE)) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}
if (AWS_UNLIKELY(codepoint >= 0xFDD0 && codepoint <= 0xFDEF)) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}

return AWS_ERROR_SUCCESS;
}

static struct aws_utf8_decoder_options s_aws_mqtt_utf8_decoder_options = {
.on_codepoint = aws_mqtt_utf8_decoder,
};

int aws_mqtt_validate_utf8_text(struct aws_byte_cursor text) {
return aws_decode_utf8(text, &s_aws_mqtt_utf8_decoder_options);
}
22 changes: 11 additions & 11 deletions source/v5/mqtt5_options_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ static int s_aws_mqtt5_user_property_set_validate(
return aws_raise_error(AWS_ERROR_MQTT5_USER_PROPERTY_VALIDATION);
}

if (aws_mqtt5_validate_utf8_text(property->name)) {
if (aws_mqtt_validate_utf8_text(property->name)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL, "id=%p: %s - user property #%zu name not valid UTF8", log_context, log_prefix, i);
return aws_raise_error(AWS_ERROR_MQTT5_USER_PROPERTY_VALIDATION);
Expand All @@ -187,7 +187,7 @@ static int s_aws_mqtt5_user_property_set_validate(
property->value.len);
return aws_raise_error(AWS_ERROR_MQTT5_USER_PROPERTY_VALIDATION);
}
if (aws_mqtt5_validate_utf8_text(property->value)) {
if (aws_mqtt_validate_utf8_text(property->value)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: %s - user property #%zu value not valid UTF8",
Expand Down Expand Up @@ -332,7 +332,7 @@ int aws_mqtt5_packet_connect_view_validate(const struct aws_mqtt5_packet_connect
return aws_raise_error(AWS_ERROR_MQTT5_CONNECT_OPTIONS_VALIDATION);
}

if (aws_mqtt5_validate_utf8_text(connect_options->client_id)) {
if (aws_mqtt_validate_utf8_text(connect_options->client_id)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_connect_view - client id not valid UTF-8",
Expand All @@ -349,7 +349,7 @@ int aws_mqtt5_packet_connect_view_validate(const struct aws_mqtt5_packet_connect
return aws_raise_error(AWS_ERROR_MQTT5_CONNECT_OPTIONS_VALIDATION);
}

if (aws_mqtt5_validate_utf8_text(*connect_options->username)) {
if (aws_mqtt_validate_utf8_text(*connect_options->username)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_connect_view - username not valid UTF-8",
Expand Down Expand Up @@ -1259,7 +1259,7 @@ int aws_mqtt5_packet_disconnect_view_validate(const struct aws_mqtt5_packet_disc
return aws_raise_error(AWS_ERROR_MQTT5_DISCONNECT_OPTIONS_VALIDATION);
}

if (aws_mqtt5_validate_utf8_text(*disconnect_view->reason_string)) {
if (aws_mqtt_validate_utf8_text(*disconnect_view->reason_string)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_disconnect_view - reason string not valid UTF-8",
Expand Down Expand Up @@ -1591,7 +1591,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - missing topic", (void *)publish_view);
return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION);
} else if (aws_mqtt5_validate_utf8_text(publish_view->topic)) {
} else if (aws_mqtt_validate_utf8_text(publish_view->topic)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL, "id=%p: aws_mqtt5_packet_publish_view - topic not valid UTF-8", (void *)publish_view);
return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION);
Expand Down Expand Up @@ -1626,7 +1626,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish

// Make sure the payload data is UTF-8 if the payload_format set to UTF8
if (*publish_view->payload_format == AWS_MQTT5_PFI_UTF8) {
if (aws_mqtt5_validate_utf8_text(publish_view->payload)) {
if (aws_mqtt_validate_utf8_text(publish_view->payload)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_publish_view - payload value is not valid UTF-8 while payload format "
Expand All @@ -1646,7 +1646,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish
return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION);
}

if (aws_mqtt5_validate_utf8_text(*publish_view->response_topic)) {
if (aws_mqtt_validate_utf8_text(*publish_view->response_topic)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_publish_view - response topic not valid UTF-8",
Expand Down Expand Up @@ -1692,7 +1692,7 @@ int aws_mqtt5_packet_publish_view_validate(const struct aws_mqtt5_packet_publish
return aws_raise_error(AWS_ERROR_MQTT5_PUBLISH_OPTIONS_VALIDATION);
}

if (aws_mqtt5_validate_utf8_text(*publish_view->content_type)) {
if (aws_mqtt_validate_utf8_text(*publish_view->content_type)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_publish_view - content type not valid UTF-8",
Expand Down Expand Up @@ -2332,7 +2332,7 @@ int aws_mqtt5_packet_unsubscribe_view_validate(const struct aws_mqtt5_packet_uns

for (size_t i = 0; i < unsubscribe_view->topic_filter_count; ++i) {
const struct aws_byte_cursor *topic_filter = &unsubscribe_view->topic_filters[i];
if (aws_mqtt5_validate_utf8_text(*topic_filter)) {
if (aws_mqtt_validate_utf8_text(*topic_filter)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_unsubscribe_view - topic filter not valid UTF-8: \"" PRInSTR "\"",
Expand Down Expand Up @@ -2603,7 +2603,7 @@ static int s_aws_mqtt5_validate_subscription(
const struct aws_mqtt5_subscription_view *subscription,
void *log_context) {

if (aws_mqtt5_validate_utf8_text(subscription->topic_filter)) {
if (aws_mqtt_validate_utf8_text(subscription->topic_filter)) {
AWS_LOGF_ERROR(
AWS_LS_MQTT5_GENERAL,
"id=%p: aws_mqtt5_packet_subscribe_view - topic filter \"" PRInSTR "\" not valid UTF-8 in subscription",
Expand Down
53 changes: 0 additions & 53 deletions source/v5/mqtt5_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

#include <aws/common/byte_buf.h>
#include <aws/common/device_random.h>
#include <aws/common/encoding.h>
#include <inttypes.h>

uint8_t aws_mqtt5_compute_fixed_header_byte1(enum aws_mqtt5_packet_type packet_type, uint8_t flags) {
Expand Down Expand Up @@ -124,24 +123,6 @@ int aws_mqtt5_negotiated_settings_init(
return AWS_OP_SUCCESS;
}

int aws_mqtt5_negotiated_settings_copy(
const struct aws_mqtt5_negotiated_settings *source,
struct aws_mqtt5_negotiated_settings *dest) {
aws_mqtt5_negotiated_settings_clean_up(dest);

*dest = *source;
AWS_ZERO_STRUCT(dest->client_id_storage);

if (source->client_id_storage.allocator != NULL) {
return aws_byte_buf_init_copy_from_cursor(
&dest->client_id_storage,
source->client_id_storage.allocator,
aws_byte_cursor_from_buf(&source->client_id_storage));
}

return AWS_OP_SUCCESS;
}

int aws_mqtt5_negotiated_settings_apply_client_id(
struct aws_mqtt5_negotiated_settings *negotiated_settings,
const struct aws_byte_cursor *client_id) {
Expand Down Expand Up @@ -553,37 +534,3 @@ bool aws_mqtt_is_topic_filter_shared_subscription(struct aws_byte_cursor topic_c

return true;
}

/* UTF-8 encoded string validation respect to [MQTT-1.5.3-2]. */
static int aws_mqtt5_utf8_decoder(uint32_t codepoint, void *user_data) {
(void)user_data;
/* U+0000 - A UTF-8 Encoded String MUST NOT include an encoding of the null character U+0000. [MQTT-1.5.4-2]
* U+0001..U+001F control characters are not valid
*/
if (AWS_UNLIKELY(codepoint <= 0x001F)) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}

/* U+007F..U+009F control characters are not valid */
if (AWS_UNLIKELY((codepoint >= 0x007F) && (codepoint <= 0x009F))) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}

/* Unicode non-characters are not valid: https://www.unicode.org/faq/private_use.html#nonchar1 */
if (AWS_UNLIKELY((codepoint & 0x00FFFF) >= 0x00FFFE)) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}
if (AWS_UNLIKELY(codepoint >= 0xFDD0 && codepoint <= 0xFDEF)) {
return aws_raise_error(AWS_ERROR_MQTT5_INVALID_UTF8_STRING);
}

return AWS_ERROR_SUCCESS;
}

struct aws_utf8_decoder_options g_aws_mqtt5_utf8_decoder_options = {
.on_codepoint = aws_mqtt5_utf8_decoder,
};

int aws_mqtt5_validate_utf8_text(struct aws_byte_cursor text) {
return aws_decode_utf8(text, &g_aws_mqtt5_utf8_decoder_options);
}
4 changes: 2 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include(AwsLibFuzzer)
enable_testing()

file(GLOB TEST_HDRS "v3/*.h v5/*.h")
set(TEST_SRC v3/*.c v5/*.c)
set(TEST_SRC v3/*.c v5/*.c *.c)
file(GLOB TESTS ${TEST_HDRS} ${TEST_SRC})

add_test_case(mqtt_packet_puback)
Expand Down Expand Up @@ -112,7 +112,7 @@ add_test_case(mqtt5_topic_get_segment_count)
add_test_case(mqtt5_shared_subscription_validation)

# utf8 utility
add_test_case(mqtt5_utf8_encoded_string_test)
add_test_case(mqtt_utf8_encoded_string_test)

# topic aliasing
add_test_case(mqtt5_inbound_topic_alias_register_failure)
Expand Down
Loading

0 comments on commit 26eea2f

Please sign in to comment.