From 4533b3c8e2c5ac201332ef0b22813805b86509c5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 15 Oct 2024 21:06:26 -1000 Subject: [PATCH] Use faster Python unicode API for unquoter (#1292) --- CHANGES/1292.misc.rst | 1 + yarl/_quoting_c.pyx | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 CHANGES/1292.misc.rst diff --git a/CHANGES/1292.misc.rst b/CHANGES/1292.misc.rst new file mode 100644 index 000000000..10ee43a11 --- /dev/null +++ b/CHANGES/1292.misc.rst @@ -0,0 +1 @@ +Improved performance of unquoting strings -- by :user:`bdraco`. diff --git a/yarl/_quoting_c.pyx b/yarl/_quoting_c.pyx index 9e9f44ec4..4e0dd5b4c 100644 --- a/yarl/_quoting_c.pyx +++ b/yarl/_quoting_c.pyx @@ -332,8 +332,10 @@ cdef class _Unquoter: return self._do_unquote(val) cdef str _do_unquote(self, str val): - if len(val) == 0: + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val) + if length == 0: return val + cdef list ret = [] cdef char buffer[4] cdef Py_ssize_t buflen = 0 @@ -341,14 +343,17 @@ cdef class _Unquoter: cdef str unquoted cdef Py_UCS4 ch = 0 cdef Py_ssize_t idx = 0 - cdef Py_ssize_t length = len(val) cdef Py_ssize_t start_pct - + cdef int kind = PyUnicode_KIND(val) + cdef const void *data = PyUnicode_DATA(val) while idx < length: - ch = val[idx] + ch = PyUnicode_READ(kind, data, idx) idx += 1 if ch == '%' and idx <= length - 2: - ch = _restore_ch(val[idx], val[idx + 1]) + ch = _restore_ch( + PyUnicode_READ(kind, data, idx), + PyUnicode_READ(kind, data, idx + 1) + ) if ch != -1: idx += 2 assert buflen < 4