Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit c553192

Browse files
MichalStrehovskyjkotas
authored andcommitted
Add support for invariant casing in PAL (#24597)
unicodedata.cpp based on UnicodeData.txt v11.0.
1 parent eae74b8 commit c553192

File tree

9 files changed

+2726
-2
lines changed

9 files changed

+2726
-2
lines changed

src/pal/inc/pal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4618,6 +4618,9 @@ PALIMPORT DLLEXPORT int __cdecl _putenv(const char *);
46184618

46194619
#define ERANGE 34
46204620

4621+
PALIMPORT WCHAR __cdecl PAL_ToUpperInvariant(WCHAR);
4622+
PALIMPORT WCHAR __cdecl PAL_ToLowerInvariant(WCHAR);
4623+
46214624
/******************* PAL-specific I/O completion port *****************/
46224625

46234626
typedef struct _PAL_IOCP_CPU_INFORMATION {

src/pal/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ set(SOURCES
212212
loader/module.cpp
213213
loader/modulename.cpp
214214
locale/unicode.cpp
215+
locale/unicodedata.cpp
215216
locale/utf8.cpp
216217
map/common.cpp
217218
map/map.cpp

src/pal/src/include/pal/unicodedata.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
#ifndef _PAL_UNICODEDATA_H_
6+
#define _PAL_UNICODEDATA_H_
7+
8+
#include "pal/palinternal.h"
9+
10+
#ifdef __cplusplus
11+
extern "C"
12+
{
13+
#endif // __cplusplus
14+
15+
#define UPPER_CASE 1
16+
#define LOWER_CASE 2
17+
18+
typedef struct
19+
{
20+
WCHAR nUnicodeValue;
21+
WORD nFlag;
22+
WCHAR nOpposingCase;
23+
} UnicodeDataRec;
24+
25+
extern CONST UnicodeDataRec UnicodeData[];
26+
extern CONST UINT UNICODE_DATA_SIZE;
27+
28+
#ifdef __cplusplus
29+
}
30+
#endif // __cplusplus
31+
32+
#endif /* _UNICODE_DATA_H_ */

src/pal/src/locale/unicode.cpp

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Revision History:
2929
#include "pal/locale.h"
3030
#include "pal/cruntime.h"
3131
#include "pal/stackstring.hpp"
32+
#include "pal/unicodedata.h"
3233

3334
#if !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION)
3435
#error Either pthread rwlocks or Core Foundation are required for Unicode support
@@ -79,6 +80,138 @@ static const CP_MAPPING CP_TO_NATIVE_TABLE[] = {
7980
// - We want Ansi marshalling to mean marshal to UTF-8 on Mac and Linux
8081
static const UINT PAL_ACP = 65001;
8182

83+
/*++
84+
Function:
85+
UnicodeDataComp
86+
This is the comparison function used by the bsearch function to search
87+
for unicode characters in the UnicodeData array.
88+
89+
Parameter:
90+
pnKey
91+
The unicode character value to search for.
92+
elem
93+
A pointer to a UnicodeDataRec.
94+
95+
Return value:
96+
<0 if pnKey < elem->nUnicodeValue
97+
0 if pnKey == elem->nUnicodeValue
98+
>0 if pnKey > elem->nUnicodeValue
99+
--*/
100+
static int UnicodeDataComp(const void *pnKey, const void *elem)
101+
{
102+
WCHAR uValue = ((UnicodeDataRec*)elem)->nUnicodeValue;
103+
104+
if (*((INT*)pnKey) < uValue)
105+
{
106+
return -1;
107+
}
108+
else if (*((INT*)pnKey) > uValue)
109+
{
110+
return 1;
111+
}
112+
else
113+
{
114+
return 0;
115+
}
116+
}
117+
118+
/*++
119+
Function:
120+
GetUnicodeData
121+
This function is used to get information about a Unicode character.
122+
123+
Parameters:
124+
nUnicodeValue
125+
The numeric value of the Unicode character to get information about.
126+
pDataRec
127+
The UnicodeDataRec to fill in with the data for the Unicode character.
128+
129+
Return value:
130+
TRUE if the Unicode character was found.
131+
132+
--*/
133+
BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec)
134+
{
135+
BOOL bRet;
136+
137+
UnicodeDataRec *dataRec;
138+
INT nNumOfChars = UNICODE_DATA_SIZE;
139+
dataRec = (UnicodeDataRec *) bsearch(&nUnicodeValue, UnicodeData, nNumOfChars,
140+
sizeof(UnicodeDataRec), UnicodeDataComp);
141+
if (dataRec == NULL)
142+
{
143+
bRet = FALSE;
144+
}
145+
else
146+
{
147+
bRet = TRUE;
148+
*pDataRec = *dataRec;
149+
}
150+
return bRet;
151+
}
152+
153+
wchar_16
154+
__cdecl
155+
PAL_ToUpperInvariant( wchar_16 c )
156+
{
157+
UnicodeDataRec dataRec;
158+
159+
PERF_ENTRY(PAL_ToUpperInvariant);
160+
ENTRY("PAL_ToUpperInvariant (c=%d)\n", c);
161+
162+
if (!GetUnicodeData(c, &dataRec))
163+
{
164+
TRACE( "Unable to retrieve unicode data for the character %c.\n", c );
165+
LOGEXIT("PAL_ToUpperInvariant returns int %d\n", c );
166+
PERF_EXIT(PAL_ToUpperInvariant);
167+
return c;
168+
}
169+
170+
if ( dataRec.nFlag != LOWER_CASE )
171+
{
172+
LOGEXIT("PAL_ToUpperInvariant returns int %d\n", c );
173+
PERF_EXIT(PAL_ToUpperInvariant);
174+
return c;
175+
}
176+
else
177+
{
178+
LOGEXIT("PAL_ToUpperInvariant returns int %d\n", dataRec.nOpposingCase );
179+
PERF_EXIT(PAL_ToUpperInvariant);
180+
return dataRec.nOpposingCase;
181+
}
182+
}
183+
184+
wchar_16
185+
__cdecl
186+
PAL_ToLowerInvariant( wchar_16 c )
187+
{
188+
UnicodeDataRec dataRec;
189+
190+
PERF_ENTRY(PAL_ToLowerInvariant);
191+
ENTRY("PAL_ToLowerInvariant (c=%d)\n", c);
192+
193+
if (!GetUnicodeData(c, &dataRec))
194+
{
195+
TRACE( "Unable to retrieve unicode data for the character %c.\n", c );
196+
LOGEXIT("PAL_ToLowerInvariant returns int %d\n", c );
197+
PERF_EXIT(PAL_ToLowerInvariant);
198+
return c;
199+
}
200+
201+
if ( dataRec.nFlag != UPPER_CASE )
202+
{
203+
LOGEXIT("PAL_ToLowerInvariant returns int %d\n", c );
204+
PERF_EXIT(PAL_ToLowerInvariant);
205+
return c;
206+
}
207+
else
208+
{
209+
LOGEXIT("PAL_ToLowerInvariant returns int %d\n", dataRec.nOpposingCase );
210+
PERF_EXIT(PAL_ToLowerInvariant);
211+
return dataRec.nOpposingCase;
212+
}
213+
}
214+
82215
/*++
83216
Function:
84217
CODEPAGEGetData

0 commit comments

Comments
 (0)