From 4533b3c8e2c5ac201332ef0b22813805b86509c5 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@koston.org>
Date: Tue, 15 Oct 2024 21:06:26 -1000
Subject: [PATCH] Use faster Python unicode API for unquoter (#1292)

---
 CHANGES/1292.misc.rst |  1 +
 yarl/_quoting_c.pyx   | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)
 create mode 100644 CHANGES/1292.misc.rst
diff --git a/CHANGES/1292.misc.rst b/CHANGES/1292.misc.rst
new file mode 100644
index 000000000..10ee43a11
--- /dev/null
+++ b/CHANGES/1292.misc.rst
@@ -0,0 +1 @@
+Improved performance of unquoting strings -- by :user:`bdraco`.
diff --git a/yarl/_quoting_c.pyx b/yarl/_quoting_c.pyx
index 9e9f44ec4..4e0dd5b4c 100644
--- a/yarl/_quoting_c.pyx
+++ b/yarl/_quoting_c.pyx
@@ -332,8 +332,10 @@ cdef class _Unquoter:
         return self._do_unquote(<str>val)
 
     cdef str _do_unquote(self, str val):
-        if len(val) == 0:
+        cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
+        if length == 0:
             return val
+
         cdef list ret = []
         cdef char buffer[4]
         cdef Py_ssize_t buflen = 0
@@ -341,14 +343,17 @@ cdef class _Unquoter:
         cdef str unquoted
         cdef Py_UCS4 ch = 0
         cdef Py_ssize_t idx = 0
-        cdef Py_ssize_t length = len(val)
         cdef Py_ssize_t start_pct
-
+        cdef int kind = PyUnicode_KIND(val)
+        cdef const void *data = PyUnicode_DATA(val)
         while idx < length:
-            ch = val[idx]
+            ch = PyUnicode_READ(kind, data, idx)
             idx += 1
             if ch == '%' and idx <= length - 2:
-                ch = _restore_ch(val[idx], val[idx + 1])
+                ch = _restore_ch(
+                    PyUnicode_READ(kind, data, idx),
+                    PyUnicode_READ(kind, data, idx + 1)
+                )
                 if ch != <Py_UCS4>-1:
                     idx += 2
                     assert buflen < 4