Skip to content

Commit 2f2aaae

Browse files
authored
Share UTF8 converters between coreclr and mono (#85558)
* Share UTF8 converters between coreclr and mono - v1 * Revert "Share UTF8 converters between coreclr and mono - v1" This reverts commit f9845ac6f53dc95fb747eb21351dfa9412397217. * Share UTF8 converters between coreclr and mono - v2 * Remove C++ runtime dependency * Initial C++ to C conversion * Delete unused macros * Fix custom alloc in mono * Error on invalid sequences when caller requested * Remove count from convert APIs
1 parent 28151b5 commit 2f2aaae

File tree

14 files changed

+2378
-3633
lines changed

14 files changed

+2378
-3633
lines changed

src/coreclr/inc/utilcode.h

-179
Original file line numberDiff line numberDiff line change
@@ -185,15 +185,6 @@ typedef LPSTR LPUTF8;
185185
// given and ANSI String, copy it into a wide buffer.
186186
// be careful about scoping when using this macro!
187187
//
188-
// how to use the below two macros:
189-
//
190-
// ...
191-
// LPSTR pszA;
192-
// pszA = MyGetAnsiStringRoutine();
193-
// MAKE_WIDEPTR_FROMANSI(pwsz, pszA);
194-
// MyUseWideStringRoutine(pwsz);
195-
// ...
196-
//
197188
// similarily for MAKE_ANSIPTR_FROMWIDE. note that the first param does not
198189
// have to be declared, and no clean up must be done.
199190
//
@@ -211,25 +202,6 @@ typedef LPSTR LPUTF8;
211202
#define MAKE_TRANSLATIONFAILED ThrowWin32(ERROR_NO_UNICODE_TRANSLATION)
212203
#endif
213204

214-
// This version throws on conversion errors (ie, no best fit character
215-
// mapping to characters that look similar, and no use of the default char
216-
// ('?') when printing out unrepresentable characters. Use this method for
217-
// most development in the EE, especially anything like metadata or class
218-
// names. See the BESTFIT version if you're printing out info to the console.
219-
#define MAKE_MULTIBYTE_FROMWIDE(ptrname, widestr, codepage) \
220-
int __l##ptrname = (int)u16_strlen(widestr); \
221-
if (__l##ptrname > MAKE_MAX_LENGTH) \
222-
MAKE_TOOLONGACTION; \
223-
__l##ptrname = (int)((__l##ptrname + 1) * 2 * sizeof(char)); \
224-
CQuickBytes __CQuickBytes##ptrname; \
225-
__CQuickBytes##ptrname.AllocThrows(__l##ptrname); \
226-
BOOL __b##ptrname; \
227-
DWORD __cBytes##ptrname = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, -1, (LPSTR)__CQuickBytes##ptrname.Ptr(), __l##ptrname, NULL, &__b##ptrname); \
228-
if (__b##ptrname || (__cBytes##ptrname == 0 && (widestr[0] != W('\0')))) { \
229-
MAKE_TRANSLATIONFAILED; \
230-
} \
231-
LPSTR ptrname = (LPSTR)__CQuickBytes##ptrname.Ptr()
232-
233205
// This version does best fit character mapping and also allows the use
234206
// of the default char ('?') for any Unicode character that isn't
235207
// representable. This is reasonable for writing to the console, but
@@ -247,40 +219,6 @@ typedef LPSTR LPUTF8;
247219
} \
248220
LPSTR ptrname = (LPSTR)__CQuickBytes##ptrname.Ptr()
249221

250-
// Use for anything critical other than output to console, where weird
251-
// character mappings are unacceptable.
252-
#define MAKE_ANSIPTR_FROMWIDE(ptrname, widestr) MAKE_MULTIBYTE_FROMWIDE(ptrname, widestr, CP_ACP)
253-
254-
// Use for output to the console.
255-
#define MAKE_ANSIPTR_FROMWIDE_BESTFIT(ptrname, widestr) MAKE_MULTIBYTE_FROMWIDE_BESTFIT(ptrname, widestr, CP_ACP)
256-
257-
#define MAKE_WIDEPTR_FROMANSI(ptrname, ansistr) \
258-
CQuickBytes __qb##ptrname; \
259-
int __l##ptrname; \
260-
__l##ptrname = WszMultiByteToWideChar(CP_ACP, 0, ansistr, -1, 0, 0); \
261-
if (__l##ptrname > MAKE_MAX_LENGTH) \
262-
MAKE_TOOLONGACTION; \
263-
LPWSTR ptrname = (LPWSTR) __qb##ptrname.AllocThrows((__l##ptrname+1)*sizeof(WCHAR)); \
264-
if (WszMultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, ansistr, -1, ptrname, __l##ptrname) == 0) { \
265-
MAKE_TRANSLATIONFAILED; \
266-
}
267-
268-
#define MAKE_WIDEPTR_FROMANSI_NOTHROW(ptrname, ansistr) \
269-
CQuickBytes __qb##ptrname; \
270-
LPWSTR ptrname = 0; \
271-
int __l##ptrname; \
272-
__l##ptrname = WszMultiByteToWideChar(CP_ACP, 0, ansistr, -1, 0, 0); \
273-
if (__l##ptrname <= MAKE_MAX_LENGTH) { \
274-
ptrname = (LPWSTR) __qb##ptrname.AllocNoThrow((__l##ptrname+1)*sizeof(WCHAR)); \
275-
if (ptrname) { \
276-
if (WszMultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, ansistr, -1, ptrname, __l##ptrname) != 0) { \
277-
ptrname[__l##ptrname] = 0; \
278-
} else { \
279-
ptrname = 0; \
280-
} \
281-
} \
282-
}
283-
284222
#define MAKE_UTF8PTR_FROMWIDE(ptrname, widestr) CQuickBytes _##ptrname; _##ptrname.ConvertUnicode_Utf8(widestr); LPSTR ptrname = (LPSTR) _##ptrname.Ptr();
285223

286224
#define MAKE_UTF8PTR_FROMWIDE_NOTHROW(ptrname, widestr) \
@@ -312,22 +250,8 @@ typedef LPSTR LPUTF8;
312250
} \
313251
} \
314252

315-
#define MAKE_WIDEPTR_FROMUTF8N(ptrname, utf8str, n8chrs) \
316-
CQuickBytes __qb##ptrname; \
317-
int __l##ptrname; \
318-
__l##ptrname = WszMultiByteToWideChar(CP_UTF8, 0, utf8str, n8chrs, 0, 0); \
319-
if (__l##ptrname > MAKE_MAX_LENGTH) \
320-
MAKE_TOOLONGACTION; \
321-
LPWSTR ptrname = (LPWSTR) __qb##ptrname .AllocThrows((__l##ptrname+1)*sizeof(WCHAR)); \
322-
if (0==WszMultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8str, n8chrs, ptrname, __l##ptrname)) { \
323-
MAKE_TRANSLATIONFAILED; \
324-
} \
325-
ptrname[__l##ptrname] = 0;
326-
327-
328253
#define MAKE_WIDEPTR_FROMUTF8(ptrname, utf8str) CQuickBytes _##ptrname; _##ptrname.ConvertUtf8_Unicode(utf8str); LPCWSTR ptrname = (LPCWSTR) _##ptrname.Ptr();
329254

330-
331255
#define MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, n8chrs) \
332256
CQuickBytes __qb##ptrname; \
333257
int __l##ptrname; \
@@ -346,42 +270,10 @@ typedef LPSTR LPUTF8;
346270

347271
#define MAKE_WIDEPTR_FROMUTF8_NOTHROW(ptrname, utf8str) MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, -1)
348272

349-
// This method takes the number of characters
350-
#define MAKE_MULTIBYTE_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt, codepage) \
351-
CQuickBytes __qb##ptrname; \
352-
int __l##ptrname; \
353-
__l##ptrname = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, _nCharacters, NULL, 0, NULL, NULL); \
354-
if (__l##ptrname > MAKE_MAX_LENGTH) \
355-
MAKE_TOOLONGACTION; \
356-
ptrname = (LPUTF8) __qb##ptrname .AllocThrows(__l##ptrname+1); \
357-
BOOL __b##ptrname; \
358-
DWORD _pCnt = WszWideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, widestr, _nCharacters, ptrname, __l##ptrname, NULL, &__b##ptrname); \
359-
if (__b##ptrname || (_pCnt == 0 && _nCharacters > 0)) { \
360-
MAKE_TRANSLATIONFAILED; \
361-
} \
362-
ptrname[__l##ptrname] = 0;
363-
364-
#define MAKE_MULTIBYTE_FROMWIDEN_BESTFIT(ptrname, widestr, _nCharacters, _pCnt, codepage) \
365-
CQuickBytes __qb##ptrname; \
366-
int __l##ptrname; \
367-
__l##ptrname = WszWideCharToMultiByte(codepage, 0, widestr, _nCharacters, NULL, 0, NULL, NULL); \
368-
if (__l##ptrname > MAKE_MAX_LENGTH) \
369-
MAKE_TOOLONGACTION; \
370-
ptrname = (LPUTF8) __qb##ptrname .AllocThrows(__l##ptrname+1); \
371-
DWORD _pCnt = WszWideCharToMultiByte(codepage, 0, widestr, _nCharacters, ptrname, __l##ptrname, NULL, NULL); \
372-
if (_pCnt == 0 && _nCharacters > 0) { \
373-
MAKE_TRANSLATIONFAILED; \
374-
} \
375-
ptrname[__l##ptrname] = 0;
376-
377-
#define MAKE_ANSIPTR_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt) \
378-
MAKE_MULTIBYTE_FROMWIDEN(ptrname, widestr, _nCharacters, _pCnt, CP_ACP)
379-
380273
const SIZE_T MaxSigned32BitDecString = ARRAY_SIZE("-2147483648") - 1;
381274
const SIZE_T MaxUnsigned32BitDecString = ARRAY_SIZE("4294967295") - 1;
382275
const SIZE_T MaxIntegerDecHexString = ARRAY_SIZE("-9223372036854775808") - 1;
383276

384-
const SIZE_T Max16BitHexString = ARRAY_SIZE("1234") - 1;
385277
const SIZE_T Max32BitHexString = ARRAY_SIZE("12345678") - 1;
386278
const SIZE_T Max64BitHexString = ARRAY_SIZE("1234567812345678") - 1;
387279

@@ -410,77 +302,6 @@ inline WCHAR* FormatInteger(WCHAR* str, size_t strCount, const char* fmt, I v)
410302
return str;
411303
}
412304

413-
inline
414-
LPWSTR DuplicateString(
415-
LPCWSTR wszString,
416-
size_t cchString)
417-
{
418-
STATIC_CONTRACT_NOTHROW;
419-
420-
LPWSTR wszDup = NULL;
421-
if (wszString != NULL)
422-
{
423-
wszDup = new (nothrow) WCHAR[cchString + 1];
424-
if (wszDup != NULL)
425-
{
426-
wcscpy_s(wszDup, cchString + 1, wszString);
427-
}
428-
}
429-
return wszDup;
430-
}
431-
432-
inline
433-
LPWSTR DuplicateString(
434-
LPCWSTR wszString)
435-
{
436-
STATIC_CONTRACT_NOTHROW;
437-
438-
if (wszString != NULL)
439-
{
440-
return DuplicateString(wszString, u16_strlen(wszString));
441-
}
442-
else
443-
{
444-
return NULL;
445-
}
446-
}
447-
448-
void DECLSPEC_NORETURN ThrowOutOfMemory();
449-
450-
inline
451-
LPWSTR DuplicateStringThrowing(
452-
LPCWSTR wszString,
453-
size_t cchString)
454-
{
455-
STATIC_CONTRACT_THROWS;
456-
457-
if (wszString == NULL)
458-
return NULL;
459-
460-
LPWSTR wszDup = DuplicateString(wszString, cchString);
461-
if (wszDup == NULL)
462-
ThrowOutOfMemory();
463-
464-
return wszDup;
465-
}
466-
467-
inline
468-
LPWSTR DuplicateStringThrowing(
469-
LPCWSTR wszString)
470-
{
471-
STATIC_CONTRACT_THROWS;
472-
473-
if (wszString == NULL)
474-
return NULL;
475-
476-
LPWSTR wszDup = DuplicateString(wszString);
477-
if (wszDup == NULL)
478-
ThrowOutOfMemory();
479-
480-
return wszDup;
481-
}
482-
483-
484305
//*****************************************************************************
485306
// Placement new is used to new and object at an exact location. The pointer
486307
// is simply returned to the caller without actually using the heap. The

src/coreclr/pal/src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ set(SOURCES
152152
loader/module.cpp
153153
locale/unicode.cpp
154154
locale/unicodedata.cpp
155-
locale/utf8.cpp
155+
${CLR_SRC_NATIVE_DIR}/minipal/utf8.c
156156
map/common.cpp
157157
map/map.cpp
158158
map/virtual.cpp

src/coreclr/pal/src/include/pal/utf8.h

-52
This file was deleted.

src/coreclr/pal/src/locale/unicode.cpp

+20-11
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Revision History:
2424
#include "pal/palinternal.h"
2525
#include "pal/dbgmsg.h"
2626
#include "pal/file.h"
27-
#include "pal/utf8.h"
27+
#include <minipal/utf8.h>
2828
#include "pal/cruntime.h"
2929
#include "pal/stackstring.hpp"
3030
#include "pal/unicodedata.h"
@@ -253,16 +253,20 @@ MultiByteToWideChar(
253253
goto EXIT;
254254
}
255255

256-
// Use UTF8ToUnicode on all systems, since it replaces
257-
// invalid characters and Core Foundation doesn't do that.
258256
if (CodePage == CP_UTF8 || CodePage == CP_ACP)
259257
{
260-
if (cbMultiByte <= -1)
258+
if (cbMultiByte < 0)
259+
cbMultiByte = strlen(lpMultiByteStr) + 1;
260+
261+
if (!lpWideCharStr || cchWideChar == 0)
262+
retval = minipal_get_length_utf8_to_utf16(lpMultiByteStr, cbMultiByte, dwFlags);
263+
264+
if (lpWideCharStr)
261265
{
262-
cbMultiByte = strlen(lpMultiByteStr) + 1;
266+
if (cchWideChar == 0) cchWideChar = retval;
267+
retval = minipal_convert_utf8_to_utf16(lpMultiByteStr, cbMultiByte, (CHAR16_T*)lpWideCharStr, cchWideChar, dwFlags);
263268
}
264269

265-
retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags);
266270
goto EXIT;
267271
}
268272

@@ -338,15 +342,20 @@ WideCharToMultiByte(
338342
defaultChar = *lpDefaultChar;
339343
}
340344

341-
// Use UnicodeToUTF8 on all systems because we use
342-
// UTF8ToUnicode in MultiByteToWideChar() on all systems.
343345
if (CodePage == CP_UTF8 || CodePage == CP_ACP)
344346
{
345-
if (cchWideChar == -1)
346-
{
347+
if (cchWideChar < 0)
347348
cchWideChar = PAL_wcslen(lpWideCharStr) + 1;
349+
350+
if (!lpMultiByteStr || cbMultiByte == 0)
351+
retval = minipal_get_length_utf16_to_utf8((CHAR16_T*)lpWideCharStr, cchWideChar, dwFlags);
352+
353+
if (lpMultiByteStr)
354+
{
355+
if (cbMultiByte == 0) cbMultiByte = retval;
356+
retval = minipal_convert_utf16_to_utf8((CHAR16_T*)lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte, dwFlags);
348357
}
349-
retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
358+
350359
goto EXIT;
351360
}
352361

0 commit comments

Comments
 (0)