From a0ba90a9d9d3b0ceb5711d65bda55b78ddbbef9e Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Wed, 26 Jun 2024 10:45:27 +0200 Subject: [PATCH] fix MultiByte <-> WideChar conversion return value on linux MultiByteToWideChar/WideCharToMultiByte are supposed to include/exclude null termination based on whether the source string buffer is null terminated. This was done by comparing the input byte/wide character count to the output byte/wide character count excluding null termination. This only works correctly if the input only consists of ASCII characters. (ie. only characters that are single byte in UTF-8) This commit changes the code to explicitly check for a null terminator in the input and add one to the output size if the input was terminated. --- lib/DxcSupport/Unicode.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/DxcSupport/Unicode.cpp b/lib/DxcSupport/Unicode.cpp index 1481ae27ff..1392219085 100644 --- a/lib/DxcSupport/Unicode.cpp +++ b/lib/DxcSupport/Unicode.cpp @@ -54,7 +54,8 @@ int MultiByteToWideChar(uint32_t /*CodePage*/, uint32_t /*dwFlags*/, size_t rv; const char *prevLocale = setlocale(LC_ALL, nullptr); setlocale(LC_ALL, "en_US.UTF-8"); - if (lpMultiByteStr[cbMultiByte - 1] != '\0') { + const bool bIsNullTerminated = lpMultiByteStr[cbMultiByte - 1] == '\0'; + if (!bIsNullTerminated) { char *srcStr = (char *)malloc((cbMultiByte + 1) * sizeof(char)); strncpy(srcStr, lpMultiByteStr, cbMultiByte); srcStr[cbMultiByte] = '\0'; @@ -67,9 +68,9 @@ int MultiByteToWideChar(uint32_t /*CodePage*/, uint32_t /*dwFlags*/, if (prevLocale) setlocale(LC_ALL, prevLocale); - if (rv == (size_t)cbMultiByte) - return rv; - return rv + 1; // mbstowcs excludes the terminating character + if (bIsNullTerminated) + return rv + 1; // mbstowcs excludes the terminating character + return rv; } // WideCharToMultiByte is a Windows-specific method. @@ -110,7 +111,8 @@ int WideCharToMultiByte(uint32_t /*CodePage*/, uint32_t /*dwFlags*/, size_t rv; const char *prevLocale = setlocale(LC_ALL, nullptr); setlocale(LC_ALL, "en_US.UTF-8"); - if (lpWideCharStr[cchWideChar - 1] != L'\0') { + const bool bIsNullTerminated = lpWideCharStr[cchWideChar - 1] == L'\0'; + if (!bIsNullTerminated) { wchar_t *srcStr = (wchar_t *)malloc((cchWideChar + 1) * sizeof(wchar_t)); wcsncpy(srcStr, lpWideCharStr, cchWideChar); srcStr[cchWideChar] = L'\0'; @@ -123,9 +125,9 @@ int WideCharToMultiByte(uint32_t /*CodePage*/, uint32_t /*dwFlags*/, if (prevLocale) setlocale(LC_ALL, prevLocale); - if (rv == (size_t)cchWideChar) - return rv; - return rv + 1; // mbstowcs excludes the terminating character + if (bIsNullTerminated) + return rv + 1; // mbstowcs excludes the terminating character + return rv; } #endif // _WIN32