Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Add support for invariant casing in PAL (#24597)
Browse files Browse the repository at this point in the history
unicodedata.cpp based on UnicodeData.txt v11.0.
  • Loading branch information
MichalStrehovsky authored and jkotas committed May 17, 2019
1 parent eae74b8 commit c553192
Show file tree
Hide file tree
Showing 9 changed files with 2,726 additions and 2 deletions.
3 changes: 3 additions & 0 deletions src/pal/inc/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -4618,6 +4618,9 @@ PALIMPORT DLLEXPORT int __cdecl _putenv(const char *);

#define ERANGE 34

PALIMPORT WCHAR __cdecl PAL_ToUpperInvariant(WCHAR);
PALIMPORT WCHAR __cdecl PAL_ToLowerInvariant(WCHAR);

/******************* PAL-specific I/O completion port *****************/

typedef struct _PAL_IOCP_CPU_INFORMATION {
Expand Down
1 change: 1 addition & 0 deletions src/pal/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ set(SOURCES
loader/module.cpp
loader/modulename.cpp
locale/unicode.cpp
locale/unicodedata.cpp
locale/utf8.cpp
map/common.cpp
map/map.cpp
Expand Down
32 changes: 32 additions & 0 deletions src/pal/src/include/pal/unicodedata.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#ifndef _PAL_UNICODEDATA_H_
#define _PAL_UNICODEDATA_H_

#include "pal/palinternal.h"

#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus

#define UPPER_CASE 1
#define LOWER_CASE 2

typedef struct
{
WCHAR nUnicodeValue;
WORD nFlag;
WCHAR nOpposingCase;
} UnicodeDataRec;

extern CONST UnicodeDataRec UnicodeData[];
extern CONST UINT UNICODE_DATA_SIZE;

#ifdef __cplusplus
}
#endif // __cplusplus

#endif /* _UNICODE_DATA_H_ */
133 changes: 133 additions & 0 deletions src/pal/src/locale/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Revision History:
#include "pal/locale.h"
#include "pal/cruntime.h"
#include "pal/stackstring.hpp"
#include "pal/unicodedata.h"

#if !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION)
#error Either pthread rwlocks or Core Foundation are required for Unicode support
Expand Down Expand Up @@ -79,6 +80,138 @@ static const CP_MAPPING CP_TO_NATIVE_TABLE[] = {
// - We want Ansi marshalling to mean marshal to UTF-8 on Mac and Linux
static const UINT PAL_ACP = 65001;

/*++
Function:
UnicodeDataComp
This is the comparison function used by the bsearch function to search
for unicode characters in the UnicodeData array.
Parameter:
pnKey
The unicode character value to search for.
elem
A pointer to a UnicodeDataRec.
Return value:
<0 if pnKey < elem->nUnicodeValue
0 if pnKey == elem->nUnicodeValue
>0 if pnKey > elem->nUnicodeValue
--*/
static int UnicodeDataComp(const void *pnKey, const void *elem)
{
WCHAR uValue = ((UnicodeDataRec*)elem)->nUnicodeValue;

if (*((INT*)pnKey) < uValue)
{
return -1;
}
else if (*((INT*)pnKey) > uValue)
{
return 1;
}
else
{
return 0;
}
}

/*++
Function:
GetUnicodeData
This function is used to get information about a Unicode character.
Parameters:
nUnicodeValue
The numeric value of the Unicode character to get information about.
pDataRec
The UnicodeDataRec to fill in with the data for the Unicode character.
Return value:
TRUE if the Unicode character was found.
--*/
BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec)
{
BOOL bRet;

UnicodeDataRec *dataRec;
INT nNumOfChars = UNICODE_DATA_SIZE;
dataRec = (UnicodeDataRec *) bsearch(&nUnicodeValue, UnicodeData, nNumOfChars,
sizeof(UnicodeDataRec), UnicodeDataComp);
if (dataRec == NULL)
{
bRet = FALSE;
}
else
{
bRet = TRUE;
*pDataRec = *dataRec;
}
return bRet;
}

wchar_16
__cdecl
PAL_ToUpperInvariant( wchar_16 c )
{
UnicodeDataRec dataRec;

PERF_ENTRY(PAL_ToUpperInvariant);
ENTRY("PAL_ToUpperInvariant (c=%d)\n", c);

if (!GetUnicodeData(c, &dataRec))
{
TRACE( "Unable to retrieve unicode data for the character %c.\n", c );
LOGEXIT("PAL_ToUpperInvariant returns int %d\n", c );
PERF_EXIT(PAL_ToUpperInvariant);
return c;
}

if ( dataRec.nFlag != LOWER_CASE )
{
LOGEXIT("PAL_ToUpperInvariant returns int %d\n", c );
PERF_EXIT(PAL_ToUpperInvariant);
return c;
}
else
{
LOGEXIT("PAL_ToUpperInvariant returns int %d\n", dataRec.nOpposingCase );
PERF_EXIT(PAL_ToUpperInvariant);
return dataRec.nOpposingCase;
}
}

wchar_16
__cdecl
PAL_ToLowerInvariant( wchar_16 c )
{
UnicodeDataRec dataRec;

PERF_ENTRY(PAL_ToLowerInvariant);
ENTRY("PAL_ToLowerInvariant (c=%d)\n", c);

if (!GetUnicodeData(c, &dataRec))
{
TRACE( "Unable to retrieve unicode data for the character %c.\n", c );
LOGEXIT("PAL_ToLowerInvariant returns int %d\n", c );
PERF_EXIT(PAL_ToLowerInvariant);
return c;
}

if ( dataRec.nFlag != UPPER_CASE )
{
LOGEXIT("PAL_ToLowerInvariant returns int %d\n", c );
PERF_EXIT(PAL_ToLowerInvariant);
return c;
}
else
{
LOGEXIT("PAL_ToLowerInvariant returns int %d\n", dataRec.nOpposingCase );
PERF_EXIT(PAL_ToLowerInvariant);
return dataRec.nOpposingCase;
}
}

/*++
Function:
CODEPAGEGetData
Expand Down
Loading

0 comments on commit c553192

Please sign in to comment.