Skip to content

Commit c712e8a

Browse files
committed
move surrogate functionality into readkey
1 parent f2fd1db commit c712e8a

File tree

1 file changed

+19
-15
lines changed

1 file changed

+19
-15
lines changed

readchar/_win_read.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,36 @@
66
def readchar() -> str:
77
"""Reads a single utf8-character from the input stream.
88
Blocks until a character is available."""
9-
# read the first character.
10-
ch = [msvcrt.getwch()]
119

12-
# if the first character indicates a surrogate pair, read the second character.
13-
if 0xD800 <= ord(ch[0]) <= 0xDFFF:
14-
ch.append(msvcrt.getwch())
15-
16-
# combine the characters into a single utf-16 encoded string.
17-
# this prevents the character from being treated as a surrogate pair again.
18-
return "".join(ch).encode("utf-16", errors="surrogatepass").decode("utf-16")
10+
# read a single wide character from the input
11+
return msvcrt.getwch()
1912

2013

2114
def readkey() -> str:
2215
"""Reads the next keypress. If an escaped key is pressed, the full
2316
sequence is read and returned as noted in `_win_key.py`."""
2417

18+
# read first character
2519
ch = readchar()
2620

21+
# keys like CTRL+C should cause a interrupt
2722
if ch in config.INTERRUPT_KEYS:
2823
raise KeyboardInterrupt
2924

30-
# if it is a normal character:
31-
if ch not in "\x00\xe0":
32-
return ch
25+
# parse special multi character keys (see key module)
26+
# https://learn.microsoft.com/cpp/c-runtime-library/reference/getch-getwch#remarks
27+
if ch in "\x00\xe0":
28+
# read the second half
29+
# we always return the 0x00 prefix, this avoids duplications in the key module
30+
ch = "\x00" + readchar()
31+
32+
# parse unicode surrogates
33+
# https://docs.python.org/3/c-api/unicode.html#c.Py_UNICODE_IS_SURROGATE
34+
if "\uD800" <= ch <= "\uDFFF":
35+
ch += readchar()
3336

34-
# if it is a scpeal key, read second half:
35-
ch2 = readchar()
37+
# combine the characters into a single utf-16 encoded string.
38+
# this prevents the character from being treated as a surrogate pair again.
39+
ch = ch.encode("utf-16", errors="surrogatepass").decode("utf-16")
3640

37-
return "\x00" + ch2
41+
return ch

0 commit comments

Comments
 (0)