dotnet · VSadov · Jan 22, 2022 · Jan 19, 2022 · Jan 21, 2022 · Jan 21, 2022
diff --git a/src/coreclr/binder/inc/assemblyidentity.hpp b/src/coreclr/binder/inc/assemblyidentity.hpp
@@ -30,12 +30,9 @@ namespace BINDER_SPACE
  IDENTITY_FLAG_PUBLIC_KEY_TOKEN = 0x004,
  IDENTITY_FLAG_PUBLIC_KEY = 0x008,
  IDENTITY_FLAG_CULTURE = 0x010,
- IDENTITY_FLAG_LANGUAGE = 0x020,
  IDENTITY_FLAG_PROCESSOR_ARCHITECTURE = 0x040,
  IDENTITY_FLAG_RETARGETABLE = 0x080,
  IDENTITY_FLAG_PUBLIC_KEY_TOKEN_NULL = 0x100,
- IDENTITY_FLAG_CUSTOM = 0x200,
- IDENTITY_FLAG_CUSTOM_NULL = 0x400,
  IDENTITY_FLAG_CONTENT_TYPE = 0x800,
  IDENTITY_FLAG_FULL_NAME = (IDENTITY_FLAG_SIMPLE_NAME |
  IDENTITY_FLAG_VERSION)
@@ -50,7 +47,6 @@ namespace BINDER_SPACE
  // Need to pre-populate SBuffers because of bogus asserts
  static const BYTE byteArr[] = { 0 };
  m_publicKeyOrTokenBLOB.SetImmutable(byteArr, sizeof(byteArr));
- m_customBLOB.SetImmutable(byteArr, sizeof(byteArr));
  }
  ~AssemblyIdentity()
  {
@@ -83,7 +79,6 @@ namespace BINDER_SPACE
  SBuffer m_publicKeyOrTokenBLOB;
  PEKIND m_kProcessorArchitecture;
  AssemblyContentType m_kContentType;
- SBuffer m_customBLOB;
  DWORD m_dwIdentityFlags;
  };
 

diff --git a/src/coreclr/binder/inc/stringlexer.hpp b/src/coreclr/binder/inc/stringlexer.hpp
@@ -55,32 +55,28 @@ namespace BINDER_SPACE
  inline StringLexer();
  inline ~StringLexer();
 
- inline void Init(SString &inputString, BOOL fSupportEscaping);
+ inline void Init(SString &inputString);
 
  static inline BOOL IsWhitespace(WCHAR wcChar);
  static inline BOOL IsEOS(WCHAR wcChar);
  static inline BOOL IsQuoteCharacter(WCHAR wcChar);
 
- virtual BOOL IsSeparatorChar(WCHAR wcChar) = NULL;
- virtual LEXEME_TYPE GetLexemeType(WCHAR wcChar) = NULL;
+ BOOL IsSeparatorChar(WCHAR wcChar);
+ LEXEME_TYPE GetLexemeType(WCHAR wcChar);
 
  protected:
  static const WCHAR INVALID_CHARACTER = -1;
 
- LEXEME_TYPE GetNextLexeme(SString &currentString, BOOL fPermitUnescapedQuotes = FALSE);
+ LEXEME_TYPE GetNextLexeme(SString &currentString);
 
  inline WCHAR PopCharacter(BOOL *pfIsEscaped);
  inline void PushCharacter(WCHAR wcCurrentChar,
  BOOL fIsEscaped);
 
  inline WCHAR GetRawCharacter();
- inline void PushRawCharacter();
- inline WCHAR DecodeUTF16Character();
  inline WCHAR GetNextCharacter(BOOL *pfIsEscaped);
 
- inline WCHAR ParseUnicode();
- LEXEME_TYPE ParseString(SString &currentString,
- BOOL fPermitUnescapeQuotes);
+ LEXEME_TYPE ParseString(SString &currentString);
 
  void TrimTrailingWhiteSpaces(SString &currentString);
 
@@ -89,8 +85,6 @@ namespace BINDER_SPACE
 
  WCHAR m_wcCurrentChar;
  BOOL m_fCurrentCharIsEscaped;
- BOOL m_fSupportEscaping;
- BOOL m_fReadRawCharacter;
  };
 
 #include "stringlexer.inl"

diff --git a/src/coreclr/binder/inc/stringlexer.inl b/src/coreclr/binder/inc/stringlexer.inl
@@ -25,12 +25,10 @@ StringLexer::~StringLexer()
  // Nothing to do here
 }
 
-void StringLexer::Init(SString &inputString, BOOL fSupportEscaping)
+void StringLexer::Init(SString &inputString)
 {
  m_cursor = inputString.Begin();
  m_end = inputString.End();
- m_fSupportEscaping = fSupportEscaping;
- m_fReadRawCharacter = FALSE;
 }
 
 BOOL StringLexer::IsWhitespace(WCHAR wcChar)
@@ -55,6 +53,7 @@ WCHAR StringLexer::PopCharacter(BOOL *pfIsEscaped)
  {
  m_wcCurrentChar = INVALID_CHARACTER;
  *pfIsEscaped = m_fCurrentCharIsEscaped;
+ m_cursor++;
  }
  else
  {
@@ -71,172 +70,63 @@ void StringLexer::PushCharacter(WCHAR wcCurrentChar,
 
  m_wcCurrentChar = wcCurrentChar;
  m_fCurrentCharIsEscaped = fIsEscaped;
+ m_cursor--;
 }
 
 WCHAR StringLexer::GetRawCharacter()
 {
  WCHAR wcCurrentChar = 0;
 
- if (m_cursor <= m_end)
+ if (m_cursor < m_end)
  {
  wcCurrentChar = m_cursor[0];
- m_fReadRawCharacter = TRUE;
  m_cursor++;
- }
- else
- {
- m_fReadRawCharacter = FALSE;
- }
-
- return wcCurrentChar;
-}
-
-void StringLexer::PushRawCharacter()
-{
- if (m_fReadRawCharacter)
- {
- m_cursor--;
- m_fReadRawCharacter = FALSE;
- }
-}
 
-WCHAR StringLexer::DecodeUTF16Character()
-{
- // See http://www.ietf.org/rfc/rfc2781.txt for details on UTF-16 encoding.
-
- WCHAR wcCurrentChar = 0;
- SCOUNT_T nCharacters = m_end - m_cursor + 1;
- WCHAR wcChar1 = GetRawCharacter();
-
- if (wcChar1 < 0xd800)
- {
- wcCurrentChar = wcChar1;
+ // do not allow \0 anywhere in the string.
+ if (wcCurrentChar == 0)
+ {
+ wcCurrentChar = INVALID_CHARACTER;
+ }
  }
  else
  {
- // StringLexer is not designed to handle UTF-16 characters beyond the Basic Multilingual Plane,
- // since it stores all characters in 16-bit WCHARs.
- // However, since the vast majority of the time, we (Microsoft) produce the manifests,
- // this is likely a non-scenario, as the other Unicode planes would never be used in practice.
-
- if (wcChar1 <= 0xdbff) // 0xd800 - 0xdbff indicates the first WCHAR of a surrogate pair
- {
- if (nCharacters >= 2)
- {
- GetRawCharacter(); // Skip the second WCHAR of the surrogate pair
- }
- }
- // Otherwise, the character is either in the 0xdc00 - 0xdfff range, indicating the second WCHAR of a surrogate pair,
- // or in the 0xE000 - 0xFFFF range, which has within it ranges of invalid characters, and which we conservatively treat
- // as invalid.
-
- wcCurrentChar = INVALID_CHARACTER;
+ // EOS
+ wcCurrentChar = 0;
  }
 
  return wcCurrentChar;
 }
 
-
 WCHAR StringLexer::GetNextCharacter(BOOL *pfIsEscaped)
 {
  *pfIsEscaped = FALSE;
 
- WCHAR wcCurrentChar = GetRawCharacter(); // DecodeUTF16Character()
+ WCHAR wcCurrentChar = GetRawCharacter();
  if (wcCurrentChar == L'\\')
  {
- WCHAR wcTempChar = GetRawCharacter(); // DecodeUTF16Character()
+ WCHAR wcTempChar = GetRawCharacter();
 
- if (m_fSupportEscaping)
- {
- // Handle standard escapes
- switch (wcTempChar)
- {
- case L'"':
- case L'\'':
- case L',':
- case L'\\':
- case L'/':
- case L'=':
- break;
- case L't':
- wcTempChar = 9;
- break;
- case L'n':
- wcTempChar = 10;
- break;
- case L'r':
- wcTempChar = 13;
- break;
- case L'u':
- wcTempChar = ParseUnicode();
- break;
- default:
- return INVALID_CHARACTER;
- }
-
- *pfIsEscaped = TRUE;
- wcCurrentChar = wcTempChar;
- }
- else
- {
- // Do not handle escapes except for quotes
- switch (wcTempChar)
- {
- case L'"':
- case L'\'':
- *pfIsEscaped = TRUE;
- wcCurrentChar = wcTempChar;
- break;
- default:
- PushRawCharacter();
- break;
- }
- }
- }
-
- return wcCurrentChar;
-}
-
-WCHAR StringLexer::ParseUnicode()
-{
- int nCharacters = 0;
- WCHAR wcUnicodeChar = 0;
-
- for(;;)
- {
- WCHAR wcCurrentChar = DecodeUTF16Character();
- nCharacters++;
-
- if (wcCurrentChar == L';')
+ // Handle standard escapes
+ switch (wcTempChar)
  {
+ case L'"':
+ case L'\'':
+ case L',':
+ case L'\\':
+ case L'=':
+ case L't':
+ case L'n':
+ case L'r':
  break;
- }
- else if ((wcCurrentChar == INVALID_CHARACTER) || (nCharacters >= 9))
- {
+ default:
  return INVALID_CHARACTER;
  }
 
- wcUnicodeChar <<= 4;
-
- if ((wcCurrentChar >= L'0') && (wcCurrentChar <= L'9'))
- {
- wcUnicodeChar += (wcCurrentChar - L'0');
- }
- else if ((wcCurrentChar >= L'a') && (wcCurrentChar <= L'f'))
- {
- wcUnicodeChar += (wcCurrentChar - L'a') + 10;
- }
- else if ((wcCurrentChar >= L'A') && (wcCurrentChar <= L'F'))
- {
- wcUnicodeChar += (wcCurrentChar - L'A') + 10;
- }
- else
- {
- return INVALID_CHARACTER;
- }
+ *pfIsEscaped = TRUE;
+ wcCurrentChar = wcTempChar;
  }
 
- return wcUnicodeChar;
+ return wcCurrentChar;
 }
 
 #endif
diff --git a/src/coreclr/binder/inc/textualidentityparser.hpp b/src/coreclr/binder/inc/textualidentityparser.hpp
@@ -28,12 +28,9 @@ namespace BINDER_SPACE
  TextualIdentityParser(AssemblyIdentity *pAssemblyIdentity);
  ~TextualIdentityParser();
 
- virtual BOOL IsSeparatorChar(WCHAR wcChar);
- virtual StringLexer::LEXEME_TYPE GetLexemeType(WCHAR wcChar);
-
  static HRESULT Parse(/* in */ SString &textualIdentity,
- /* out */ AssemblyIdentity *pAssemblyIdentity,
- /* in */ BOOL fPermitUnescapedQuotes = FALSE);
+ /* out */ AssemblyIdentity *pAssemblyIdentity);
+
  static HRESULT ToString(/* in */ AssemblyIdentity *pAssemblyIdentity,
  /* in */ DWORD dwIdentityFlags,
  /* out */ SString &textualIdentity);
@@ -45,15 +42,15 @@ namespace BINDER_SPACE
  /* in */ BOOL fValidateHex,
  /* in */ BOOL fIsToken,
  /* out */ SBuffer &publicKeyOrTokenBLOB);
+
  static void BlobToHex(/* in */ SBuffer &publicKeyOrTokenBLOB,
  /* out */ SString &publicKeyOrToken);
 
  BOOL ParseString(/* in */ SString &textualString,
  /* out */ SString &contentString);
 
  protected:
- BOOL Parse(/* in */ SString &textualIdentity,
- /* in */ BOOL fPermitUnescapedQuotes = FALSE);
+ BOOL Parse(/* in */ SString &textualIdentity);
 
  BOOL PopulateAssemblyIdentity(/* in */ SString &attributeString,
  /* in */ SString &valueString);

diff --git a/src/coreclr/binder/stringlexer.cpp b/src/coreclr/binder/stringlexer.cpp
@@ -19,7 +19,7 @@
 namespace BINDER_SPACE
 {
  StringLexer::LEXEME_TYPE
- StringLexer::GetNextLexeme(SString &currentString, BOOL fPermitUnescapedQuotes)
+ StringLexer::GetNextLexeme(SString &currentString)
  {
  BOOL fIsEscaped = FALSE;
  WCHAR wcCurrentChar = INVALID_CHARACTER;
@@ -43,11 +43,11 @@ namespace BINDER_SPACE
 
  // First character of string lexeme; push it back
  PushCharacter(wcCurrentChar, fIsEscaped);
- return ParseString(currentString, fPermitUnescapedQuotes);
+ return ParseString(currentString);
  }
 
  StringLexer::LEXEME_TYPE
- StringLexer::ParseString(SString &currentString, BOOL fPermitUnescapedQuotes)
+ StringLexer::ParseString(SString &currentString)
  {
  BOOL fIsFirstCharacter = TRUE;
  WCHAR wcCurrentChar = INVALID_CHARACTER;
@@ -99,7 +99,7 @@ namespace BINDER_SPACE
  break;
  }
 
- if (!fPermitUnescapedQuotes && !fIsEscaped && IsQuoteCharacter(wcCurrentChar) && !IsQuoteCharacter(wcOpeningQuote))
+ if (!fIsEscaped && IsQuoteCharacter(wcCurrentChar) && !IsQuoteCharacter(wcOpeningQuote))
  {
  // Unescaped quotes in the middle of the string are an error
  return LEXEME_TYPE_INVALID;
@@ -147,4 +147,24 @@ namespace BINDER_SPACE
  currentString.Truncate(cursor + 1);
  }
  }
+
+ BOOL StringLexer::IsSeparatorChar(WCHAR wcChar)
+ {
+ return ((wcChar == W(',')) || (wcChar == W('=')));
+ }
+
+ StringLexer::LEXEME_TYPE StringLexer::GetLexemeType(WCHAR wcChar)
+ {
+ switch (wcChar)
+ {
+ case W('='):
+ return LEXEME_TYPE_EQUALS;
+ case W(','):
+ return LEXEME_TYPE_COMMA;
+ case 0:
+ return LEXEME_TYPE_END_OF_STREAM;
+ default:
+ return LEXEME_TYPE_STRING;
+ }
+ }
 };