From a0c2219de0d042e9978a59e75195eb46d96bc5e2 Mon Sep 17 00:00:00 2001 From: alektron Date: Sat, 10 Aug 2024 15:39:32 +0200 Subject: [PATCH 1/2] =?UTF-8?q?Fix:=20utf8=5Fdecode=20fails=20for=20certai?= =?UTF-8?q?n=2016=20bit=20multibyte=20sequences=20e.g.=20when=20typing=20a?= =?UTF-8?q?=20'=C3=9C';?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/base/base_strings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/base_strings.c b/src/base/base_strings.c index 91a47562f..14a4c27e2 100644 --- a/src/base/base_strings.c +++ b/src/base/base_strings.c @@ -1228,7 +1228,7 @@ utf8_decode(U8 *str, U64 max){ }break; case 2: { - if (2 < max) + if (1 < max) { U8 cont_byte = str[1]; if (utf8_class[cont_byte >> 3] == 0) From e7b2f27a2a44a70d813f9a7b9336b920bdeb54a4 Mon Sep 17 00:00:00 2001 From: alektron Date: Sat, 10 Aug 2024 15:40:45 +0200 Subject: [PATCH 2/2] UI_EventDeltaUnit_Char now accounts for UTF8 multi byte sequence; --- src/ui/ui_core.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index d2b79b831..7010d4e8a 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -175,8 +175,35 @@ ui_single_line_txt_op_from_event(Arena *arena, UI_Event *event, String8 string, default:{}break; case UI_EventDeltaUnit_Char: { - // TODO(rjf): this should account for multi-byte characters in UTF-8... for now, just assume ASCII and - // no-op + if (delta.x > 0) + { + UnicodeDecode decode = utf8_decode(string.str + cursor.column - 1, string.size); + delta.x = decode.inc; + } + else + { + //Backwards check/count for UTF-8 multi byte sequence + U32 numSeqBytes = 0; + for (S64 idx = cursor.column - 2; idx >= 0; idx -= 1) + { + B32 isSeqByte = ExtractBit(string.str[idx], 7) == 1 && ExtractBit(string.str[idx], 6) == 0; + numSeqBytes += isSeqByte; + if (!isSeqByte) + { + if (numSeqBytes > 0) + { + U32 initialByte = ~(string.str[idx]) << 24; + U64 numLeadingBits = clz32(initialByte); + B32 isMultiByteSeq = numLeadingBits == numSeqBytes + 1; + if (isMultiByteSeq) + { + delta.x = -(numSeqBytes + 1); + } + } + break; + } + } + } }break; case UI_EventDeltaUnit_Word: {