diff --git a/docs/feature_unicode.md b/docs/feature_unicode.md index bd1f4fa5ae05..546af2521a47 100644 --- a/docs/feature_unicode.md +++ b/docs/feature_unicode.md @@ -193,12 +193,24 @@ By default, when the keyboard boots, it will initialize the input mode to the la !> Using `UNICODE_SELECTED_MODES` means you don't have to initially set the input mode in `matrix_init_user()` (or a similar function); the Unicode system will do that for you on startup. This has the added benefit of avoiding unnecessary writes to EEPROM. -## `send_unicode_hex_string` +## `send_unicode_string()` -To type multiple characters for things like (ノಠ痊ಠ)ノ彡┻━┻, you can use `send_unicode_hex_string()` much like `SEND_STRING()` except you would use hex values separate by spaces. -For example, the table flip seen above would be `send_unicode_hex_string("0028 30CE 0CA0 75CA 0CA0 0029 30CE 5F61 253B 2501 253B")` +This function is much like `send_string()` but allows you to input UTF-8 characters directly, currently up to code point U+FFFF. Make sure your `keymap.c` is formatted in UTF-8 encoding. -There are many ways to get a hex code, but an easy one is [this site](https://r12a.github.io/app-conversion/). Just make sure to convert to hexadecimal, and that is your string. +```c +send_unicode_string("(ノಠ痊ಠ)ノ彡┻━┻"); +``` + +## `send_unicode_hex_string()` + +Similar to `send_unicode_string()`, but the characters are represented by their code point values in ASCII, separated by spaces. For example, the table flip above would be achieved with: + +```c +send_unicode_hex_string("0028 30CE 0CA0 75CA 0CA0 0029 30CE 5F61 253B 2501 253B"); +``` + +An easy way to convert your Unicode string to this format is by using [this site](https://r12a.github.io/app-conversion/), and taking the result in the "Hex/UTF-32" section. +Unlike `send_unicode_string()` this function supports code points up to U+10FFFF. ## Additional Language Support @@ -228,6 +240,6 @@ AutoHotkey inserts the Text right of `Send, ` when this combination is pressed. If you enable the US International layout on the system, it will use punctuation to accent the characters. -For instance, typing "`a" will result in à. +For instance, typing "\`a" will result in à. You can find details on how to enable this [here](https://support.microsoft.com/en-us/help/17424/windows-change-keyboard-layout). diff --git a/quantum/process_keycode/process_unicode_common.c b/quantum/process_keycode/process_unicode_common.c index 94383f19b749..4ac305e66176 100644 --- a/quantum/process_keycode/process_unicode_common.c +++ b/quantum/process_keycode/process_unicode_common.c @@ -178,6 +178,55 @@ void send_unicode_hex_string(const char *str) { } } +// Borrowed from https://nullprogram.com/blog/2017/10/06/ +const char *decode_utf8(const char *str, int32_t *code_point) { + const char *next; + + if (str[0] < 0x80) { // U+0000-007F + *code_point = str[0]; + next = str + 1; + } else if ((str[0] & 0xE0) == 0xC0) { // U+0080-07FF + *code_point = ((int32_t)(str[0] & 0x1F) << 6) | ((int32_t)(str[1] & 0x3F) << 0); + next = str + 2; + } else if ((str[0] & 0xF0) == 0xE0) { // U+0800-FFFF + *code_point = ((int32_t)(str[0] & 0x0F) << 12) | ((int32_t)(str[1] & 0x3F) << 6) | ((int32_t)(str[2] & 0x3F) << 0); + next = str + 3; + } else if ((str[0] & 0xF8) == 0xF0 && (str[0] <= 0xF4)) { // U+10000-10FFFF + // Skip for now - register_hex() only takes a uint16 + //*code_point = ((int32_t)(str[0] & 0x07) << 18) | ((int32_t)(str[1] & 0x3F) << 12) | ((int32_t)(str[2] & 0x3F) << 6) | ((int32_t)(str[3] & 0x3F) << 0); + *code_point = -1; + next = str + 4; + } else { + *code_point = -1; + next = str + 1; + } + + // part of a UTF-16 surrogate pair - invalid + if (*code_point >= 0xD800 && *code_point <= 0xDFFF) { + *code_point = -1; + } + + return next; +} + +void send_unicode_string(const char *str) { + if (!str) { + return; + } + + int32_t code_point = 0; + + while (*str) { + str = decode_utf8(str, &code_point); + + if (code_point >= 0) { + unicode_input_start(); + register_hex(code_point); + unicode_input_finish(); + } + } +} + bool process_unicode_common(uint16_t keycode, keyrecord_t *record) { if (record->event.pressed) { switch (keycode) { diff --git a/quantum/process_keycode/process_unicode_common.h b/quantum/process_keycode/process_unicode_common.h index cab6eea6eafd..393db2d99eb7 100644 --- a/quantum/process_keycode/process_unicode_common.h +++ b/quantum/process_keycode/process_unicode_common.h @@ -80,6 +80,7 @@ void unicode_input_cancel(void); void register_hex(uint16_t hex); void send_unicode_hex_string(const char *str); +void send_unicode_string(const char *str); bool process_unicode_common(uint16_t keycode, keyrecord_t *record);