Skip to content

Commit

Permalink
Extra tests for assembly name parser. (#64022)
Browse files Browse the repository at this point in the history
* Dead code in native assembly name parsing

* disallow `\u` escaping in assembly names

* misc cleanup

* forward slash is illegal escaped or not

* ignore "language" attribute in assembly name ("culture" must be used)

* duplicate attributes are ok if unrecognized (just add tests)

* drop support for "custom" blob attribute

* drop support for publickey[token]=neutral ("null" must be used)

* ignore unknown assembly name attributes in mono (compat)

* disallow \0 anywhere in the assembly name

* disallow \0 in assembly names on mono (compat)

* only check for embedded nulls when parsing

* fix mono build

* make GCC happy

* couple test scenarios for publickey vs. publickeytoken (CoreRT parser might trip on these)

* produce errors on duplicate known attributes in mono
  • Loading branch information
VSadov authored Jan 22, 2022
1 parent feb25b0 commit 8d2268a
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 238 deletions.
5 changes: 0 additions & 5 deletions src/coreclr/binder/inc/assemblyidentity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,9 @@ namespace BINDER_SPACE
IDENTITY_FLAG_PUBLIC_KEY_TOKEN = 0x004,
IDENTITY_FLAG_PUBLIC_KEY = 0x008,
IDENTITY_FLAG_CULTURE = 0x010,
IDENTITY_FLAG_LANGUAGE = 0x020,
IDENTITY_FLAG_PROCESSOR_ARCHITECTURE = 0x040,
IDENTITY_FLAG_RETARGETABLE = 0x080,
IDENTITY_FLAG_PUBLIC_KEY_TOKEN_NULL = 0x100,
IDENTITY_FLAG_CUSTOM = 0x200,
IDENTITY_FLAG_CUSTOM_NULL = 0x400,
IDENTITY_FLAG_CONTENT_TYPE = 0x800,
IDENTITY_FLAG_FULL_NAME = (IDENTITY_FLAG_SIMPLE_NAME |
IDENTITY_FLAG_VERSION)
Expand All @@ -50,7 +47,6 @@ namespace BINDER_SPACE
// Need to pre-populate SBuffers because of bogus asserts
static const BYTE byteArr[] = { 0 };
m_publicKeyOrTokenBLOB.SetImmutable(byteArr, sizeof(byteArr));
m_customBLOB.SetImmutable(byteArr, sizeof(byteArr));
}
~AssemblyIdentity()
{
Expand Down Expand Up @@ -83,7 +79,6 @@ namespace BINDER_SPACE
SBuffer m_publicKeyOrTokenBLOB;
PEKIND m_kProcessorArchitecture;
AssemblyContentType m_kContentType;
SBuffer m_customBLOB;
DWORD m_dwIdentityFlags;
};

Expand Down
16 changes: 5 additions & 11 deletions src/coreclr/binder/inc/stringlexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,28 @@ namespace BINDER_SPACE
inline StringLexer();
inline ~StringLexer();

inline void Init(SString &inputString, BOOL fSupportEscaping);
inline void Init(SString &inputString);

static inline BOOL IsWhitespace(WCHAR wcChar);
static inline BOOL IsEOS(WCHAR wcChar);
static inline BOOL IsQuoteCharacter(WCHAR wcChar);

virtual BOOL IsSeparatorChar(WCHAR wcChar) = NULL;
virtual LEXEME_TYPE GetLexemeType(WCHAR wcChar) = NULL;
BOOL IsSeparatorChar(WCHAR wcChar);
LEXEME_TYPE GetLexemeType(WCHAR wcChar);

protected:
static const WCHAR INVALID_CHARACTER = -1;

LEXEME_TYPE GetNextLexeme(SString &currentString, BOOL fPermitUnescapedQuotes = FALSE);
LEXEME_TYPE GetNextLexeme(SString &currentString);

inline WCHAR PopCharacter(BOOL *pfIsEscaped);
inline void PushCharacter(WCHAR wcCurrentChar,
BOOL fIsEscaped);

inline WCHAR GetRawCharacter();
inline void PushRawCharacter();
inline WCHAR DecodeUTF16Character();
inline WCHAR GetNextCharacter(BOOL *pfIsEscaped);

inline WCHAR ParseUnicode();
LEXEME_TYPE ParseString(SString &currentString,
BOOL fPermitUnescapeQuotes);
LEXEME_TYPE ParseString(SString &currentString);

void TrimTrailingWhiteSpaces(SString &currentString);

Expand All @@ -89,8 +85,6 @@ namespace BINDER_SPACE

WCHAR m_wcCurrentChar;
BOOL m_fCurrentCharIsEscaped;
BOOL m_fSupportEscaping;
BOOL m_fReadRawCharacter;
};

#include "stringlexer.inl"
Expand Down
164 changes: 27 additions & 137 deletions src/coreclr/binder/inc/stringlexer.inl
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,10 @@ StringLexer::~StringLexer()
// Nothing to do here
}

void StringLexer::Init(SString &inputString, BOOL fSupportEscaping)
void StringLexer::Init(SString &inputString)
{
m_cursor = inputString.Begin();
m_end = inputString.End();
m_fSupportEscaping = fSupportEscaping;
m_fReadRawCharacter = FALSE;
}

BOOL StringLexer::IsWhitespace(WCHAR wcChar)
Expand All @@ -55,6 +53,7 @@ WCHAR StringLexer::PopCharacter(BOOL *pfIsEscaped)
{
m_wcCurrentChar = INVALID_CHARACTER;
*pfIsEscaped = m_fCurrentCharIsEscaped;
m_cursor++;
}
else
{
Expand All @@ -71,172 +70,63 @@ void StringLexer::PushCharacter(WCHAR wcCurrentChar,

m_wcCurrentChar = wcCurrentChar;
m_fCurrentCharIsEscaped = fIsEscaped;
m_cursor--;
}

WCHAR StringLexer::GetRawCharacter()
{
WCHAR wcCurrentChar = 0;

if (m_cursor <= m_end)
if (m_cursor < m_end)
{
wcCurrentChar = m_cursor[0];
m_fReadRawCharacter = TRUE;
m_cursor++;
}
else
{
m_fReadRawCharacter = FALSE;
}

return wcCurrentChar;
}

void StringLexer::PushRawCharacter()
{
if (m_fReadRawCharacter)
{
m_cursor--;
m_fReadRawCharacter = FALSE;
}
}

WCHAR StringLexer::DecodeUTF16Character()
{
// See http://www.ietf.org/rfc/rfc2781.txt for details on UTF-16 encoding.

WCHAR wcCurrentChar = 0;
SCOUNT_T nCharacters = m_end - m_cursor + 1;
WCHAR wcChar1 = GetRawCharacter();

if (wcChar1 < 0xd800)
{
wcCurrentChar = wcChar1;
// do not allow \0 anywhere in the string.
if (wcCurrentChar == 0)
{
wcCurrentChar = INVALID_CHARACTER;
}
}
else
{
// StringLexer is not designed to handle UTF-16 characters beyond the Basic Multilingual Plane,
// since it stores all characters in 16-bit WCHARs.
// However, since the vast majority of the time, we (Microsoft) produce the manifests,
// this is likely a non-scenario, as the other Unicode planes would never be used in practice.

if (wcChar1 <= 0xdbff) // 0xd800 - 0xdbff indicates the first WCHAR of a surrogate pair
{
if (nCharacters >= 2)
{
GetRawCharacter(); // Skip the second WCHAR of the surrogate pair
}
}
// Otherwise, the character is either in the 0xdc00 - 0xdfff range, indicating the second WCHAR of a surrogate pair,
// or in the 0xE000 - 0xFFFF range, which has within it ranges of invalid characters, and which we conservatively treat
// as invalid.

wcCurrentChar = INVALID_CHARACTER;
// EOS
wcCurrentChar = 0;
}

return wcCurrentChar;
}


WCHAR StringLexer::GetNextCharacter(BOOL *pfIsEscaped)
{
*pfIsEscaped = FALSE;

WCHAR wcCurrentChar = GetRawCharacter(); // DecodeUTF16Character()
WCHAR wcCurrentChar = GetRawCharacter();
if (wcCurrentChar == L'\\')
{
WCHAR wcTempChar = GetRawCharacter(); // DecodeUTF16Character()
WCHAR wcTempChar = GetRawCharacter();

if (m_fSupportEscaping)
{
// Handle standard escapes
switch (wcTempChar)
{
case L'"':
case L'\'':
case L',':
case L'\\':
case L'/':
case L'=':
break;
case L't':
wcTempChar = 9;
break;
case L'n':
wcTempChar = 10;
break;
case L'r':
wcTempChar = 13;
break;
case L'u':
wcTempChar = ParseUnicode();
break;
default:
return INVALID_CHARACTER;
}

*pfIsEscaped = TRUE;
wcCurrentChar = wcTempChar;
}
else
{
// Do not handle escapes except for quotes
switch (wcTempChar)
{
case L'"':
case L'\'':
*pfIsEscaped = TRUE;
wcCurrentChar = wcTempChar;
break;
default:
PushRawCharacter();
break;
}
}
}

return wcCurrentChar;
}

WCHAR StringLexer::ParseUnicode()
{
int nCharacters = 0;
WCHAR wcUnicodeChar = 0;

for(;;)
{
WCHAR wcCurrentChar = DecodeUTF16Character();
nCharacters++;

if (wcCurrentChar == L';')
// Handle standard escapes
switch (wcTempChar)
{
case L'"':
case L'\'':
case L',':
case L'\\':
case L'=':
case L't':
case L'n':
case L'r':
break;
}
else if ((wcCurrentChar == INVALID_CHARACTER) || (nCharacters >= 9))
{
default:
return INVALID_CHARACTER;
}

wcUnicodeChar <<= 4;

if ((wcCurrentChar >= L'0') && (wcCurrentChar <= L'9'))
{
wcUnicodeChar += (wcCurrentChar - L'0');
}
else if ((wcCurrentChar >= L'a') && (wcCurrentChar <= L'f'))
{
wcUnicodeChar += (wcCurrentChar - L'a') + 10;
}
else if ((wcCurrentChar >= L'A') && (wcCurrentChar <= L'F'))
{
wcUnicodeChar += (wcCurrentChar - L'A') + 10;
}
else
{
return INVALID_CHARACTER;
}
*pfIsEscaped = TRUE;
wcCurrentChar = wcTempChar;
}

return wcUnicodeChar;
return wcCurrentChar;
}

#endif
11 changes: 4 additions & 7 deletions src/coreclr/binder/inc/textualidentityparser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,9 @@ namespace BINDER_SPACE
TextualIdentityParser(AssemblyIdentity *pAssemblyIdentity);
~TextualIdentityParser();

virtual BOOL IsSeparatorChar(WCHAR wcChar);
virtual StringLexer::LEXEME_TYPE GetLexemeType(WCHAR wcChar);

static HRESULT Parse(/* in */ SString &textualIdentity,
/* out */ AssemblyIdentity *pAssemblyIdentity,
/* in */ BOOL fPermitUnescapedQuotes = FALSE);
/* out */ AssemblyIdentity *pAssemblyIdentity);

static HRESULT ToString(/* in */ AssemblyIdentity *pAssemblyIdentity,
/* in */ DWORD dwIdentityFlags,
/* out */ SString &textualIdentity);
Expand All @@ -45,15 +42,15 @@ namespace BINDER_SPACE
/* in */ BOOL fValidateHex,
/* in */ BOOL fIsToken,
/* out */ SBuffer &publicKeyOrTokenBLOB);

static void BlobToHex(/* in */ SBuffer &publicKeyOrTokenBLOB,
/* out */ SString &publicKeyOrToken);

BOOL ParseString(/* in */ SString &textualString,
/* out */ SString &contentString);

protected:
BOOL Parse(/* in */ SString &textualIdentity,
/* in */ BOOL fPermitUnescapedQuotes = FALSE);
BOOL Parse(/* in */ SString &textualIdentity);

BOOL PopulateAssemblyIdentity(/* in */ SString &attributeString,
/* in */ SString &valueString);
Expand Down
28 changes: 24 additions & 4 deletions src/coreclr/binder/stringlexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace BINDER_SPACE
{
StringLexer::LEXEME_TYPE
StringLexer::GetNextLexeme(SString &currentString, BOOL fPermitUnescapedQuotes)
StringLexer::GetNextLexeme(SString &currentString)
{
BOOL fIsEscaped = FALSE;
WCHAR wcCurrentChar = INVALID_CHARACTER;
Expand All @@ -43,11 +43,11 @@ namespace BINDER_SPACE

// First character of string lexeme; push it back
PushCharacter(wcCurrentChar, fIsEscaped);
return ParseString(currentString, fPermitUnescapedQuotes);
return ParseString(currentString);
}

StringLexer::LEXEME_TYPE
StringLexer::ParseString(SString &currentString, BOOL fPermitUnescapedQuotes)
StringLexer::ParseString(SString &currentString)
{
BOOL fIsFirstCharacter = TRUE;
WCHAR wcCurrentChar = INVALID_CHARACTER;
Expand Down Expand Up @@ -99,7 +99,7 @@ namespace BINDER_SPACE
break;
}

if (!fPermitUnescapedQuotes && !fIsEscaped && IsQuoteCharacter(wcCurrentChar) && !IsQuoteCharacter(wcOpeningQuote))
if (!fIsEscaped && IsQuoteCharacter(wcCurrentChar) && !IsQuoteCharacter(wcOpeningQuote))
{
// Unescaped quotes in the middle of the string are an error
return LEXEME_TYPE_INVALID;
Expand Down Expand Up @@ -147,4 +147,24 @@ namespace BINDER_SPACE
currentString.Truncate(cursor + 1);
}
}

BOOL StringLexer::IsSeparatorChar(WCHAR wcChar)
{
return ((wcChar == W(',')) || (wcChar == W('=')));
}

StringLexer::LEXEME_TYPE StringLexer::GetLexemeType(WCHAR wcChar)
{
switch (wcChar)
{
case W('='):
return LEXEME_TYPE_EQUALS;
case W(','):
return LEXEME_TYPE_COMMA;
case 0:
return LEXEME_TYPE_END_OF_STREAM;
default:
return LEXEME_TYPE_STRING;
}
}
};
Loading

0 comments on commit 8d2268a

Please sign in to comment.