Skip to content

Commit

Permalink
Allow embedded zeroes (even if it pains me to do so)
Browse files Browse the repository at this point in the history
  • Loading branch information
andy31415 committed Nov 9, 2023
1 parent 967c32b commit 9c301c7
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 16 deletions.
23 changes: 20 additions & 3 deletions src/lib/support/tests/TestUtf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,26 @@ void TestValidStrings(nlTestSuite * inSuite, void * inContext)
NL_TEST_ASSERT(inSuite, IsValidCStringAsUtf8(""));
NL_TEST_ASSERT(inSuite, IsValidCStringAsUtf8("􏿿"));
NL_TEST_ASSERT(inSuite, IsValidCStringAsUtf8("����"));


// NOTE: UTF8 allows embeded NULLs
// even though strings like that are probably not ideal for handling
// Test that we allow this, but consider later to disallow them
// completely if the spec is updated as such
{
char zero[16] = { 0 };
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(zero, 0)));
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(zero, 1)));
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(zero, 2)));
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(zero, 3)));
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(zero, 4)));
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(zero, 16)));
}

{
char insideZero[] = "test\0zero";
NL_TEST_ASSERT(inSuite, Utf8::IsValid(CharSpan(insideZero)));
}
}

#define TEST_INVALID_BYTES(...) \
Expand All @@ -78,9 +98,6 @@ void TestValidStrings(nlTestSuite * inSuite, void * inContext)

void TestInvalidStrings(nlTestSuite * inSuite, void * inContext)
{
// cannot embed zeroes
TEST_INVALID_BYTES(0x00);

// overly long representation
TEST_INVALID_BYTES(0xe0, 0b1001'1111, 0x80); // A
TEST_INVALID_BYTES(0xed, 0b1011'0000, 0x80); // B
Expand Down
10 changes: 0 additions & 10 deletions src/lib/support/utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,6 @@ enum class ParserState

ParserState NextState(ParserState state, uint8_t value)
{
if (value == 0)
{
// Refuse to have embedded 0s
//
// NOTE: this is NOT a UTF-8 requirement but a chip encoding
// requirement for TLV. We have this here to adhere to the
// exposed (non-anon-namespace) API.
return ParserState::kInvalid;
}

switch (state)
{
case ParserState::kFirstByte:
Expand Down
4 changes: 1 addition & 3 deletions src/lib/support/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ namespace chip {
namespace Utf8 {

/**
* Validate that the given span looks like a valid UTF-8 string:
* - no embedded nulls (this is a Matter Spec requirement for encoding)
* - valid UTF8 encoding overall
* Validate that the given span looks like a valid UTF-8 string
*
* UTF-8 encoding described at
* https://www.unicode.org/versions/Unicode12.0.0/UnicodeStandard-12.0.pdf
Expand Down

0 comments on commit 9c301c7

Please sign in to comment.