Skip to content

Commit

Permalink
Use utf conversions from minipal (#89036)
Browse files Browse the repository at this point in the history
* Support for utf conversion

* cast fix

* FB

* FB

* Update src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h

Co-authored-by: Jan Kotas <jkotas@microsoft.com>

* Apply suggestions from code review

Co-authored-by: Jan Kotas <jkotas@microsoft.com>

* FB

* FB

* FB

---------

Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
LakshanF and jkotas authored Jul 21, 2023
1 parent 5bc100e commit c0d7d2d
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 36 deletions.
8 changes: 8 additions & 0 deletions src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@ set(AOT_EVENTPIPE_SHIM_DIR "${CMAKE_CURRENT_SOURCE_DIR}")

set (CONTAINER_SOURCES "")
set (CONTAINER_HEADERS "")
set (MINIPAL_SOURCES "")
set (EVENTPIPE_SOURCES "")
set (EVENTPIPE_HEADERS "")
set (GEN_EVENTPIPE_SOURCES "")

set (SHARED_CONTAINERS_SOURCE_PATH "${CLR_SRC_NATIVE_DIR}/containers")
set (SHARED_EVENTPIPE_SOURCE_PATH "${CLR_SRC_NATIVE_DIR}/eventpipe")
set (SHARED_MINIPAL_SOURCE_PATH "${CLR_SRC_NATIVE_DIR}/minipal")
include (${SHARED_EVENTPIPE_SOURCE_PATH}/eventpipe.cmake)
include (${SHARED_CONTAINERS_SOURCE_PATH}/containers.cmake)

list(APPEND MINIPAL_SOURCES
utf8.c
)

if(CLR_CMAKE_HOST_WIN32)
list(APPEND SHARED_DIAGNOSTIC_SERVER_SOURCES
ds-ipc-pal-namedpipe.c
Expand Down Expand Up @@ -50,6 +56,7 @@ list(APPEND EVENTPIPE_HEADERS

addprefix(CONTAINER_SOURCES ${SHARED_CONTAINERS_SOURCE_PATH} "${SHARED_CONTAINER_SOURCES}")
addprefix(CONTAINER_HEADERS ${SHARED_CONTAINERS_SOURCE_PATH} "${SHARED_CONTAINER_HEADERS}")
addprefix(MINIPAL_SOURCES ${SHARED_MINIPAL_SOURCE_PATH} "${MINIPAL_SOURCES}")

addprefix(EVENTPIPE_SOURCES ${SHARED_EVENTPIPE_SOURCE_PATH} "${EVENTPIPE_SOURCES}")
addprefix(EVENTPIPE_HEADERS ${SHARED_EVENTPIPE_SOURCE_PATH} "${EVENTPIPE_HEADERS}")
Expand Down Expand Up @@ -125,6 +132,7 @@ list(APPEND EVENTPIPE_SOURCES
${GEN_EVENTPIPE_SOURCES}
${CONTAINER_SOURCES}
${CONTAINER_HEADERS}
${MINIPAL_SOURCES}
)

list(APPEND AOT_EVENTPIPE_DISABLED_SOURCES
Expand Down
79 changes: 53 additions & 26 deletions src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <sys/time.h>
#endif

#include <minipal/utf8.h>

#include <eventpipe/ep-rt-config.h>
#ifdef ENABLE_PERFTRACING
#include <eventpipe/ep-thread.h>
Expand Down Expand Up @@ -1371,6 +1373,7 @@ ep_rt_utf8_string_replace (
return false;
}


static
ep_char16_t *
ep_rt_utf8_to_utf16le_string (
Expand All @@ -1382,22 +1385,36 @@ ep_rt_utf8_to_utf16le_string (
if (!str)
return NULL;

// Shipping criteria: no EVENTPIPE-NATIVEAOT-TODO left in the codebase
// Implementation would just use strlen and malloc to make a new buffer, and would then copy the string chars one by one.
// Assumes that only ASCII is used for ep_char8_t
size_t len_utf8 = strlen(str);
ep_char16_t *str_utf16 = reinterpret_cast<ep_char16_t *>(malloc ((len_utf8 + 1) * sizeof (ep_char16_t)));
if (!str_utf16)
if (len == 0) {
// Return an empty string if the length is 0
CHAR16_T * lpDestEmptyStr = reinterpret_cast<CHAR16_T *>(malloc(1 * sizeof(CHAR16_T)));
if(lpDestEmptyStr==NULL) {
return NULL;
}
*lpDestEmptyStr = '\0';
return reinterpret_cast<ep_char16_t*>(lpDestEmptyStr);
}

if (len == (size_t) -1) {
// Following the pattern used in EventPipe library where it allocates 1 extra character
len = strlen(str) + 1;
}

int32_t flags = MINIPAL_MB_NO_REPLACE_INVALID_CHARS | MINIPAL_TREAT_AS_LITTLE_ENDIAN;

size_t ret = minipal_get_length_utf8_to_utf16 (str, len, flags);

if (ret <= 0)
return NULL;

for (size_t i = 0; i < len_utf8; i++)
{
EP_ASSERT(isascii(str[i]));
str_utf16[i] = str[i];
CHAR16_T * lpDestStr = reinterpret_cast<CHAR16_T *>(malloc((ret + 1) * sizeof(CHAR16_T)));
if(lpDestStr==NULL) {
return NULL;
}
ret = minipal_convert_utf8_to_utf16 (str, len, lpDestStr, ret, flags);
lpDestStr[ret] = '\0';

str_utf16[len_utf8] = 0;
return str_utf16;
return reinterpret_cast<ep_char16_t*>(lpDestStr);
}

static
Expand Down Expand Up @@ -1446,27 +1463,37 @@ ep_rt_utf16_to_utf8_string (
size_t len)
{
STATIC_CONTRACT_NOTHROW;

if (!str)
return NULL;

// shipping criteria: no EVENTPIPE-NATIVEAOT-TODO left in the codebase
// Simple implementation to create a utf8 string from a utf16 one
size_t len_utf16 = len;
if(len_utf16 == (size_t)-1)
len_utf16 = ep_rt_utf16_string_len (str);

ep_char8_t *str_utf8 = reinterpret_cast<ep_char8_t *>(malloc ((len_utf16 + 1) * sizeof (ep_char8_t)));
if (!str_utf8)
if (len == 0) {
// Return an empty string if the length is 0
char * lpDestEmptyStr = reinterpret_cast<char *>(malloc(1 * sizeof(char)));
if(lpDestEmptyStr==NULL) {
return NULL;
}
*lpDestEmptyStr = '\0';
return reinterpret_cast<ep_char8_t*>(lpDestEmptyStr);
}

if (len == (size_t) -1) {
// Following the pattern used in EventPipe library where it allocates 1 extra character
len = ep_rt_utf16_string_len (str) + 1;
}

size_t ret = minipal_get_length_utf16_to_utf8 (reinterpret_cast<const CHAR16_T *>(str), len, 0);

if (ret <= 0)
return NULL;

for (size_t i = 0; i < len_utf16; i++)
{
str_utf8[i] = (char)str[i];
char* lpDestStr = reinterpret_cast<char *>(malloc((ret + 1) * sizeof(char)));
if(lpDestStr==NULL) {
return NULL;
}
ret = minipal_convert_utf16_to_utf8 (reinterpret_cast<const CHAR16_T*>(str), len, lpDestStr, ret, 0);
lpDestStr[ret] = '\0';

str_utf8[len_utf16] = 0;
return str_utf8;
return reinterpret_cast<ep_char8_t*>(lpDestStr);
}

static
Expand Down
21 changes: 11 additions & 10 deletions src/native/minipal/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,8 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
// Initialize stuff
unsigned char *pSrc = bytes;
unsigned char *pEnd = pSrc + count;
int availableBytes, chc;
size_t availableBytes;
int chc;

// Start by assuming we have as many as count, charCount always includes the adjustment
// for the character being decoded
Expand Down Expand Up @@ -532,7 +533,7 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun

EncodeChar:

availableBytes = pEnd - pSrc;
availableBytes = (size_t)(pEnd - pSrc);

// don't fall into the fast decoding loop if we don't have enough bytes
if (availableBytes <= 13)
Expand Down Expand Up @@ -749,7 +750,7 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
return 0; \
}

static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount, CHAR16_T* chars, size_t charCount)
static size_t GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount, CHAR16_T* chars, size_t charCount)
{
assert(chars != NULL);
assert(byteCount >= 0);
Expand Down Expand Up @@ -982,8 +983,8 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
*pTarget = (CHAR16_T)ch;
ENSURE_BUFFER_INC

int availableChars = pAllocatedBufferEnd - pTarget;
int availableBytes = pEnd - pSrc;
size_t availableChars = (size_t)(pAllocatedBufferEnd - pTarget);
size_t availableBytes = (size_t)(pEnd - pSrc);

// don't fall into the fast decoding loop if we don't have enough bytes
// Test for availableChars is done because pStop would be <= pTarget.
Expand Down Expand Up @@ -1289,7 +1290,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
return 0;
}

return pTarget - chars;
return (size_t)(pTarget - chars);
}

static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, unsigned char* bytes, size_t byteCount)
Expand Down Expand Up @@ -1510,8 +1511,8 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
if (fallbackUsed && (ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder)) != 0)
goto ProcessChar;

int availableChars = pEnd - pSrc;
int availableBytes = pAllocatedBufferEnd - pTarget;
size_t availableChars = (size_t)(pEnd - pSrc);
size_t availableBytes = (size_t)(pAllocatedBufferEnd - pTarget);

// don't fall into the fast decoding loop if we don't have enough characters
// Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
Expand Down Expand Up @@ -1709,7 +1710,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
return 0;
}

return (int)(pTarget - bytes);
return (size_t)(pTarget - bytes);
}

static size_t GetByteCount(UTF8Encoding* self, CHAR16_T *chars, size_t count)
Expand Down Expand Up @@ -1889,7 +1890,7 @@ static size_t GetByteCount(UTF8Encoding* self, CHAR16_T *chars, size_t count)
goto ProcessChar;
}

int availableChars = pEnd - pSrc;
size_t availableChars = (size_t)(pEnd - pSrc);

// don't fall into the fast decoding loop if we don't have enough characters
if (availableChars <= 13)
Expand Down

0 comments on commit c0d7d2d

Please sign in to comment.