move surrogate functionality into readkey

Cube707 · Cube707 · commit c712e8af88f5 · 2024-08-11T20:38:44.000+02:00
diff --git a/readchar/_win_read.py b/readchar/_win_read.py
@@ -6,32 +6,36 @@
 def readchar() -> str:
     """Reads a single utf8-character from the input stream.
     Blocks until a character is available."""
-    # read the first character.
-    ch = [msvcrt.getwch()]
 
-    # if the first character indicates a surrogate pair, read the second character.
-    if 0xD800 <= ord(ch[0]) <= 0xDFFF:
-        ch.append(msvcrt.getwch())
-
-    # combine the characters into a single utf-16 encoded string.
-    # this prevents the character from being treated as a surrogate pair again.
-    return "".join(ch).encode("utf-16", errors="surrogatepass").decode("utf-16")
+    # read a single wide character from the input
+    return msvcrt.getwch()
 
 
 def readkey() -> str:
     """Reads the next keypress. If an escaped key is pressed, the full
     sequence is read and returned as noted in `_win_key.py`."""
 
+    # read first character
     ch = readchar()
 
+    # keys like CTRL+C should cause a interrupt
     if ch in config.INTERRUPT_KEYS:
         raise KeyboardInterrupt
 
-    # if it is a normal character:
-    if ch not in "\x00\xe0":
-        return ch
+    # parse special multi character keys (see key module)
+    # https://learn.microsoft.com/cpp/c-runtime-library/reference/getch-getwch#remarks
+    if ch in "\x00\xe0":
+        # read the second half
+        # we always return the 0x00 prefix, this avoids duplications in the key module
+        ch = "\x00" + readchar()
+
+    # parse unicode surrogates
+    # https://docs.python.org/3/c-api/unicode.html#c.Py_UNICODE_IS_SURROGATE
+    if "\uD800" <= ch <= "\uDFFF":
+        ch += readchar()
 
-    # if it is a scpeal key, read second half:
-    ch2 = readchar()
+        # combine the characters into a single utf-16 encoded string.
+        # this prevents the character from being treated as a surrogate pair again.
+        ch = ch.encode("utf-16", errors="surrogatepass").decode("utf-16")
 
-    return "\x00" + ch2
+    return ch