From a3caf1b18c710d0cb1ca20dba3404ef87328e3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 10 Apr 2025 13:43:48 +0200 Subject: [PATCH 1/3] handle overflows for 'K' and 'u' formats in `do_mkvalue` --- Doc/c-api/arg.rst | 2 + ...-04-25-11-39-24.gh-issue-132909.JC3n_l.rst | 3 + Python/modsupport.c | 71 ++++++++++++------- 3 files changed, 52 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 81b093a3510914..f933888d627e38 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -626,6 +626,7 @@ Building values ``z#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Same as ``s#``. + .. _capi-py-buildvalue-format-u: ``u`` (:class:`str`) [const wchar_t \*] Convert a null-terminated :c:type:`wchar_t` buffer of Unicode (UTF-16 or UCS-4) data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``, @@ -669,6 +670,7 @@ Building values ``L`` (:class:`int`) [long long] Convert a C :c:expr:`long long` to a Python integer object. + .. _capi-py-buildvalue-format-K: ``K`` (:class:`int`) [unsigned long long] Convert a C :c:expr:`unsigned long long` to a Python integer object. diff --git a/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst b/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst new file mode 100644 index 00000000000000..93dcaa49d83151 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst @@ -0,0 +1,3 @@ +Fix overflows when handling :ref:`K ` and +:ref:`u ` formats in :c:func:`Py_BuildValue`. +Patch by Bénédikt Tran. diff --git a/Python/modsupport.c b/Python/modsupport.c index 501231affe8cd4..85d5c399f099fe 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -268,6 +268,30 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) return v; } +static Py_ssize_t +safe_strlen(const char *u) +{ + size_t m = strlen(u); + if (m > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "string too long for Python string"); + return -1; + } + return (Py_ssize_t)m; +} + +static Py_ssize_t +safe_wcslen(const wchar_t *u) +{ + size_t m = wcslen(u); + if (m > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "string too long for Python string"); + return -1; + } + return (Py_ssize_t)m; +} + static PyObject * do_mkvalue(const char **p_format, va_list *p_va) { @@ -318,32 +342,41 @@ do_mkvalue(const char **p_format, va_list *p_va) } case 'L': - return PyLong_FromLongLong((long long)va_arg(*p_va, long long)); + { + long long v = va_arg(*p_va, long long); + return PyLong_FromLongLong(v); + } case 'K': - return PyLong_FromUnsignedLongLong((long long)va_arg(*p_va, unsigned long long)); + { + unsigned long long v = va_arg(*p_va, unsigned long long); + return PyLong_FromUnsignedLongLong(v); + } case 'u': { PyObject *v; - const wchar_t *u = va_arg(*p_va, wchar_t*); - Py_ssize_t n; + const wchar_t *u = va_arg(*p_va, const wchar_t *); + Py_ssize_t n = -1; if (**p_format == '#') { ++*p_format; n = va_arg(*p_va, Py_ssize_t); } - else - n = -1; if (u == NULL) { v = Py_NewRef(Py_None); } else { - if (n < 0) - n = wcslen(u); + if (n < 0) { + n = safe_wcslen(u); + if (n < 0) { + return NULL; + } + } v = PyUnicode_FromWideChar(u, n); } return v; } + case 'f': case 'd': return PyFloat_FromDouble( @@ -376,25 +409,20 @@ do_mkvalue(const char **p_format, va_list *p_va) { PyObject *v; const char *str = va_arg(*p_va, const char *); - Py_ssize_t n; + Py_ssize_t n = -1; if (**p_format == '#') { ++*p_format; n = va_arg(*p_va, Py_ssize_t); } - else - n = -1; if (str == NULL) { v = Py_NewRef(Py_None); } else { if (n < 0) { - size_t m = strlen(str); - if (m > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "string too long for Python string"); + n = safe_strlen(str); + if (n < 0) { return NULL; } - n = (Py_ssize_t)m; } v = PyUnicode_FromStringAndSize(str, n); } @@ -405,25 +433,20 @@ do_mkvalue(const char **p_format, va_list *p_va) { PyObject *v; const char *str = va_arg(*p_va, const char *); - Py_ssize_t n; + Py_ssize_t n = -1; if (**p_format == '#') { ++*p_format; n = va_arg(*p_va, Py_ssize_t); } - else - n = -1; if (str == NULL) { v = Py_NewRef(Py_None); } else { if (n < 0) { - size_t m = strlen(str); - if (m > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "string too long for Python bytes"); + n = safe_strlen(str); + if (n < 0) { return NULL; } - n = (Py_ssize_t)m; } v = PyBytes_FromStringAndSize(str, n); } From f74214c061653de8d1797a98a8805fb049c089cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 25 Apr 2025 11:59:37 +0200 Subject: [PATCH 2/3] fix(docs) --- Doc/c-api/arg.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index f933888d627e38..cb1f6c703ef63a 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -627,6 +627,7 @@ Building values Same as ``s#``. .. _capi-py-buildvalue-format-u: + ``u`` (:class:`str`) [const wchar_t \*] Convert a null-terminated :c:type:`wchar_t` buffer of Unicode (UTF-16 or UCS-4) data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``, @@ -671,6 +672,7 @@ Building values Convert a C :c:expr:`long long` to a Python integer object. .. _capi-py-buildvalue-format-K: + ``K`` (:class:`int`) [unsigned long long] Convert a C :c:expr:`unsigned long long` to a Python integer object. From 80fd7f7834c2c84acb2f80955d3546cd9d0baeab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 25 Apr 2025 12:16:08 +0200 Subject: [PATCH 3/3] reduce diff --- Doc/c-api/arg.rst | 2 - ...-04-25-11-39-24.gh-issue-132909.JC3n_l.rst | 5 +- Python/modsupport.c | 72 +++++++------------ 3 files changed, 27 insertions(+), 52 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index cb1f6c703ef63a..d68dc0885681df 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -626,8 +626,6 @@ Building values ``z#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Same as ``s#``. - .. _capi-py-buildvalue-format-u: - ``u`` (:class:`str`) [const wchar_t \*] Convert a null-terminated :c:type:`wchar_t` buffer of Unicode (UTF-16 or UCS-4) data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``, diff --git a/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst b/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst index 93dcaa49d83151..81a37d0595e2e0 100644 --- a/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst +++ b/Misc/NEWS.d/next/C_API/2025-04-25-11-39-24.gh-issue-132909.JC3n_l.rst @@ -1,3 +1,2 @@ -Fix overflows when handling :ref:`K ` and -:ref:`u ` formats in :c:func:`Py_BuildValue`. -Patch by Bénédikt Tran. +Fix an overflow when handling the :ref:`K ` format +in :c:func:`Py_BuildValue`. Patch by Bénédikt Tran. diff --git a/Python/modsupport.c b/Python/modsupport.c index 85d5c399f099fe..2caf595949d52f 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -268,30 +268,6 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) return v; } -static Py_ssize_t -safe_strlen(const char *u) -{ - size_t m = strlen(u); - if (m > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "string too long for Python string"); - return -1; - } - return (Py_ssize_t)m; -} - -static Py_ssize_t -safe_wcslen(const wchar_t *u) -{ - size_t m = wcslen(u); - if (m > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "string too long for Python string"); - return -1; - } - return (Py_ssize_t)m; -} - static PyObject * do_mkvalue(const char **p_format, va_list *p_va) { @@ -342,41 +318,33 @@ do_mkvalue(const char **p_format, va_list *p_va) } case 'L': - { - long long v = va_arg(*p_va, long long); - return PyLong_FromLongLong(v); - } + return PyLong_FromLongLong((long long)va_arg(*p_va, long long)); case 'K': - { - unsigned long long v = va_arg(*p_va, unsigned long long); - return PyLong_FromUnsignedLongLong(v); - } + return PyLong_FromUnsignedLongLong( + va_arg(*p_va, unsigned long long)); case 'u': { PyObject *v; - const wchar_t *u = va_arg(*p_va, const wchar_t *); - Py_ssize_t n = -1; + const wchar_t *u = va_arg(*p_va, wchar_t*); + Py_ssize_t n; if (**p_format == '#') { ++*p_format; n = va_arg(*p_va, Py_ssize_t); } + else + n = -1; if (u == NULL) { v = Py_NewRef(Py_None); } else { - if (n < 0) { - n = safe_wcslen(u); - if (n < 0) { - return NULL; - } - } + if (n < 0) + n = wcslen(u); v = PyUnicode_FromWideChar(u, n); } return v; } - case 'f': case 'd': return PyFloat_FromDouble( @@ -409,20 +377,25 @@ do_mkvalue(const char **p_format, va_list *p_va) { PyObject *v; const char *str = va_arg(*p_va, const char *); - Py_ssize_t n = -1; + Py_ssize_t n; if (**p_format == '#') { ++*p_format; n = va_arg(*p_va, Py_ssize_t); } + else + n = -1; if (str == NULL) { v = Py_NewRef(Py_None); } else { if (n < 0) { - n = safe_strlen(str); - if (n < 0) { + size_t m = strlen(str); + if (m > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "string too long for Python string"); return NULL; } + n = (Py_ssize_t)m; } v = PyUnicode_FromStringAndSize(str, n); } @@ -433,20 +406,25 @@ do_mkvalue(const char **p_format, va_list *p_va) { PyObject *v; const char *str = va_arg(*p_va, const char *); - Py_ssize_t n = -1; + Py_ssize_t n; if (**p_format == '#') { ++*p_format; n = va_arg(*p_va, Py_ssize_t); } + else + n = -1; if (str == NULL) { v = Py_NewRef(Py_None); } else { if (n < 0) { - n = safe_strlen(str); - if (n < 0) { + size_t m = strlen(str); + if (m > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "string too long for Python bytes"); return NULL; } + n = (Py_ssize_t)m; } v = PyBytes_FromStringAndSize(str, n); }