From b33fb2d61a14afb646f86c9c79058920563ba925 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 10:26:28 +0300 Subject: [PATCH 01/13] gh-101703: use `snprintf` instead of `sprintf` --- Modules/_datetimemodule.c | 8 +++++--- Modules/_pickle.c | 2 +- Modules/_ssl.c | 3 ++- Modules/_testcapimodule.c | 3 ++- Modules/getnameinfo.c | 2 +- Modules/socketmodule.c | 4 ++-- Objects/bytesobject.c | 21 ++++++++++++--------- Objects/typeobject.c | 3 ++- Python/specialize.c | 4 ++-- 9 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index eda8c5610ba659..93f950e97f6e34 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1559,11 +1559,13 @@ make_freplacement(PyObject *object) { char freplacement[64]; if (PyTime_Check(object)) - sprintf(freplacement, "%06d", TIME_GET_MICROSECOND(object)); + snprintf(freplacement, sizeof(freplacement), + "%06d", TIME_GET_MICROSECOND(object)); else if (PyDateTime_Check(object)) - sprintf(freplacement, "%06d", DATE_GET_MICROSECOND(object)); + snprintf(freplacement, sizeof(freplacement), + "%06d", DATE_GET_MICROSECOND(object)); else - sprintf(freplacement, "%06d", 0); + snprintf(freplacement, sizeof(freplacement), "%06d", 0); return PyBytes_FromStringAndSize(freplacement, strlen(freplacement)); } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 1b34977806b661..b39bd90ed5324f 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2098,7 +2098,7 @@ save_long(PicklerObject *self, PyObject *obj) } } else { - sprintf(pdata, "%c%ld\n", INT, val); + snprintf(pdata, sizeof(pdata), "%c%ld\n", INT, val); len = strlen(pdata); } if (_Pickler_Write(self, pdata, len) < 0) diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 8f03a846aed089..ddd86abfad0298 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -1324,8 +1324,9 @@ _get_peer_alt_names (_sslmodulestate *state, X509 *certificate) { } else if (name->d.ip->length == 16) { /* PyUnicode_FromFormat() does not support %X */ unsigned char *p = name->d.ip->data; - len = sprintf( + len = snprintf( buf, + sizeof(buf), "%X:%X:%X:%X:%X:%X:%X:%X", p[0] << 8 | p[1], p[2] << 8 | p[3], diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3c411fa0d76358..a733246294a321 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1097,7 +1097,8 @@ test_capsule(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef FAIL #define FAIL(x) \ { \ - sprintf(buffer, "%s module: \"%s\" attribute: \"%s\"", \ + snprintf(buffer, sizeof(buffer), \ + "%s module: \"%s\" attribute: \"%s\"", \ x, known->module, known->attribute); \ error = buffer; \ goto exit; \ diff --git a/Modules/getnameinfo.c b/Modules/getnameinfo.c index 335021f79bafea..b98f735f5f8dc5 100644 --- a/Modules/getnameinfo.c +++ b/Modules/getnameinfo.c @@ -139,7 +139,7 @@ getnameinfo(sa, salen, host, hostlen, serv, servlen, flags) if (serv == NULL || servlen == 0) { /* what we should do? */ } else if (flags & NI_NUMERICSERV) { - sprintf(numserv, "%d", ntohs(port)); + snprintf(numserv, sizeof(numserv), "%d", ntohs(port)); if (strlen(numserv) > servlen) return ENI_MEMORY; strcpy(serv, numserv); diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 0a9e46512b157b..9c0f906ad902a6 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -1302,11 +1302,11 @@ makebdaddr(bdaddr_t *bdaddr) octets[i] = ((*bdaddr) >> (8 * i)) & 0xFF; } - sprintf(buf, "%02X:%02X:%02X:%02X:%02X:%02X", + snprintf(buf, sizeof(buf), "%02X:%02X:%02X:%02X:%02X:%02X", octets[5], octets[4], octets[3], octets[2], octets[1], octets[0]); #else - sprintf(buf, "%02X:%02X:%02X:%02X:%02X:%02X", + snprintf(buf, sizeof(buf), "%02X:%02X:%02X:%02X:%02X:%02X", bdaddr->b[5], bdaddr->b[4], bdaddr->b[3], bdaddr->b[2], bdaddr->b[1], bdaddr->b[0]); #endif diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ba2c2e978c6e42..330211d7bfdc1d 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -270,13 +270,14 @@ PyBytes_FromFormatV(const char *format, va_list vargs) case 'd': if (longflag) { - sprintf(buffer, "%ld", va_arg(vargs, long)); + snprintf(buffer, sizeof(buffer), "%ld", va_arg(vargs, long)); } else if (size_tflag) { - sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t)); + snprintf(buffer, sizeof(buffer), + "%zd", va_arg(vargs, Py_ssize_t)); } else { - sprintf(buffer, "%d", va_arg(vargs, int)); + snprintf(buffer, sizeof(buffer), "%d", va_arg(vargs, int)); } assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); @@ -284,26 +285,28 @@ PyBytes_FromFormatV(const char *format, va_list vargs) case 'u': if (longflag) { - sprintf(buffer, "%lu", va_arg(vargs, unsigned long)); + snprintf(buffer, sizeof(buffer), + "%lu", va_arg(vargs, unsigned long)); } else if (size_tflag) { - sprintf(buffer, "%zu", va_arg(vargs, size_t)); + snprintf(buffer, sizeof(buffer), "%zu", va_arg(vargs, size_t)); } else { - sprintf(buffer, "%u", va_arg(vargs, unsigned int)); + snprintf(buffer, sizeof(buffer), + "%u", va_arg(vargs, unsigned int)); } assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); break; case 'i': - sprintf(buffer, "%i", va_arg(vargs, int)); + snprintf(buffer, sizeof(buffer), "%i", va_arg(vargs, int)); assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); break; case 'x': - sprintf(buffer, "%x", va_arg(vargs, int)); + snprintf(buffer, sizeof(buffer), "%x", va_arg(vargs, int)); assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); break; @@ -329,7 +332,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) } case 'p': - sprintf(buffer, "%p", va_arg(vargs, void*)); + snprintf(buffer, sizeof(buffer), "%p", va_arg(vargs, void*)); assert(strlen(buffer) < sizeof(buffer)); /* %p is ill-defined: ensure leading 0x. */ if (buffer[1] == 'X') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bf6ccdb77a90f0..809db5545741f0 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4730,7 +4730,8 @@ type_traverse(PyTypeObject *type, visitproc visit, void *arg) for heaptypes. */ if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { char msg[200]; - sprintf(msg, "type_traverse() called on non-heap type '%.100s'", + snprintf(msg, sizeof(msg), + "type_traverse() called on non-heap type '%.100s'", type->tp_name); _PyObject_ASSERT_FAILED_MSG((PyObject *)type, msg); } diff --git a/Python/specialize.c b/Python/specialize.c index 908ad6dceb57f3..ce6ebcd85052ea 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -232,7 +232,7 @@ _Py_PrintSpecializationStats(int to_file) hex_name[40] = '\0'; char buf[64]; assert(strlen(dirname) + 40 + strlen(".txt") < 64); - sprintf(buf, "%s%s.txt", dirname, hex_name); + snprintf(buf, sizeof(buf), "%s%s.txt", dirname, hex_name); FILE *fout = fopen(buf, "w"); if (fout) { out = fout; @@ -1084,7 +1084,7 @@ PyObject *descr, DescriptorClassification kind) if (dict) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NOT_MANAGED_DICT); return 0; - } + } assert(owner_cls->tp_dictoffset > 0); assert(owner_cls->tp_dictoffset <= INT16_MAX); _py_set_opcode(instr, LOAD_ATTR_METHOD_LAZY_DICT); From 8fb350d756d6b204c898155da60fe8a27ad2f6f8 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 11:33:42 +0300 Subject: [PATCH 02/13] Use `PyOS_snprintf` --- Modules/_datetimemodule.c | 10 +++++----- Modules/_pickle.c | 2 +- Modules/_ssl.c | 2 +- Modules/_testcapimodule.c | 2 +- Modules/getnameinfo.c | 2 +- Modules/socketmodule.c | 4 ++-- Objects/bytesobject.c | 26 ++++++++++++++------------ Objects/typeobject.c | 6 +++--- Python/specialize.c | 2 +- 9 files changed, 29 insertions(+), 27 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 93f950e97f6e34..ec95ca3e5d6f0e 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1559,13 +1559,13 @@ make_freplacement(PyObject *object) { char freplacement[64]; if (PyTime_Check(object)) - snprintf(freplacement, sizeof(freplacement), - "%06d", TIME_GET_MICROSECOND(object)); + PyOS_snprintf(freplacement, sizeof(freplacement), + "%06d", TIME_GET_MICROSECOND(object)); else if (PyDateTime_Check(object)) - snprintf(freplacement, sizeof(freplacement), - "%06d", DATE_GET_MICROSECOND(object)); + PyOS_snprintf(freplacement, sizeof(freplacement), + "%06d", DATE_GET_MICROSECOND(object)); else - snprintf(freplacement, sizeof(freplacement), "%06d", 0); + PyOS_snprintf(freplacement, sizeof(freplacement), "%06d", 0); return PyBytes_FromStringAndSize(freplacement, strlen(freplacement)); } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index b39bd90ed5324f..38b2d4e68988fe 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2098,7 +2098,7 @@ save_long(PicklerObject *self, PyObject *obj) } } else { - snprintf(pdata, sizeof(pdata), "%c%ld\n", INT, val); + PyOS_snprintf(pdata, sizeof(pdata), "%c%ld\n", INT, val); len = strlen(pdata); } if (_Pickler_Write(self, pdata, len) < 0) diff --git a/Modules/_ssl.c b/Modules/_ssl.c index ddd86abfad0298..09cf8b390dce26 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -1324,7 +1324,7 @@ _get_peer_alt_names (_sslmodulestate *state, X509 *certificate) { } else if (name->d.ip->length == 16) { /* PyUnicode_FromFormat() does not support %X */ unsigned char *p = name->d.ip->data; - len = snprintf( + len = PyOS_snprintf( buf, sizeof(buf), "%X:%X:%X:%X:%X:%X:%X:%X", diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index a733246294a321..5eb034a971e325 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1097,7 +1097,7 @@ test_capsule(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef FAIL #define FAIL(x) \ { \ - snprintf(buffer, sizeof(buffer), \ + PyOS_snprintf(buffer, sizeof(buffer), \ "%s module: \"%s\" attribute: \"%s\"", \ x, known->module, known->attribute); \ error = buffer; \ diff --git a/Modules/getnameinfo.c b/Modules/getnameinfo.c index b98f735f5f8dc5..3e26d556191fbe 100644 --- a/Modules/getnameinfo.c +++ b/Modules/getnameinfo.c @@ -139,7 +139,7 @@ getnameinfo(sa, salen, host, hostlen, serv, servlen, flags) if (serv == NULL || servlen == 0) { /* what we should do? */ } else if (flags & NI_NUMERICSERV) { - snprintf(numserv, sizeof(numserv), "%d", ntohs(port)); + PyOS_snprintf(numserv, sizeof(numserv), "%d", ntohs(port)); if (strlen(numserv) > servlen) return ENI_MEMORY; strcpy(serv, numserv); diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 9c0f906ad902a6..8eb7dd0983f800 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -1302,11 +1302,11 @@ makebdaddr(bdaddr_t *bdaddr) octets[i] = ((*bdaddr) >> (8 * i)) & 0xFF; } - snprintf(buf, sizeof(buf), "%02X:%02X:%02X:%02X:%02X:%02X", + PyOS_snprintf(buf, sizeof(buf), "%02X:%02X:%02X:%02X:%02X:%02X", octets[5], octets[4], octets[3], octets[2], octets[1], octets[0]); #else - snprintf(buf, sizeof(buf), "%02X:%02X:%02X:%02X:%02X:%02X", + PyOS_snprintf(buf, sizeof(buf), "%02X:%02X:%02X:%02X:%02X:%02X", bdaddr->b[5], bdaddr->b[4], bdaddr->b[3], bdaddr->b[2], bdaddr->b[1], bdaddr->b[0]); #endif diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 330211d7bfdc1d..c84efe3e63d7e5 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -270,14 +270,15 @@ PyBytes_FromFormatV(const char *format, va_list vargs) case 'd': if (longflag) { - snprintf(buffer, sizeof(buffer), "%ld", va_arg(vargs, long)); + PyOS_snprintf(buffer, sizeof(buffer), + "%ld", va_arg(vargs, long)); } else if (size_tflag) { - snprintf(buffer, sizeof(buffer), - "%zd", va_arg(vargs, Py_ssize_t)); + PyOS_snprintf(buffer, sizeof(buffer), + "%zd", va_arg(vargs, Py_ssize_t)); } else { - snprintf(buffer, sizeof(buffer), "%d", va_arg(vargs, int)); + PyOS_snprintf(buffer, sizeof(buffer), "%d", va_arg(vargs, int)); } assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); @@ -285,28 +286,29 @@ PyBytes_FromFormatV(const char *format, va_list vargs) case 'u': if (longflag) { - snprintf(buffer, sizeof(buffer), - "%lu", va_arg(vargs, unsigned long)); + PyOS_snprintf(buffer, sizeof(buffer), + "%lu", va_arg(vargs, unsigned long)); } else if (size_tflag) { - snprintf(buffer, sizeof(buffer), "%zu", va_arg(vargs, size_t)); + PyOS_snprintf(buffer, sizeof(buffer), + "%zu", va_arg(vargs, size_t)); } else { - snprintf(buffer, sizeof(buffer), - "%u", va_arg(vargs, unsigned int)); + PyOS_snprintf(buffer, sizeof(buffer), + "%u", va_arg(vargs, unsigned int)); } assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); break; case 'i': - snprintf(buffer, sizeof(buffer), "%i", va_arg(vargs, int)); + PyOS_snprintf(buffer, sizeof(buffer), "%i", va_arg(vargs, int)); assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); break; case 'x': - snprintf(buffer, sizeof(buffer), "%x", va_arg(vargs, int)); + PyOS_snprintf(buffer, sizeof(buffer), "%x", va_arg(vargs, int)); assert(strlen(buffer) < sizeof(buffer)); WRITE_BYTES(buffer); break; @@ -332,7 +334,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) } case 'p': - snprintf(buffer, sizeof(buffer), "%p", va_arg(vargs, void*)); + PyOS_snprintf(buffer, sizeof(buffer), "%p", va_arg(vargs, void*)); assert(strlen(buffer) < sizeof(buffer)); /* %p is ill-defined: ensure leading 0x. */ if (buffer[1] == 'X') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 809db5545741f0..e47fa0b220ab3b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4730,9 +4730,9 @@ type_traverse(PyTypeObject *type, visitproc visit, void *arg) for heaptypes. */ if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { char msg[200]; - snprintf(msg, sizeof(msg), - "type_traverse() called on non-heap type '%.100s'", - type->tp_name); + PyOS_snprintf(msg, sizeof(msg), + "type_traverse() called on non-heap type '%.100s'", + type->tp_name); _PyObject_ASSERT_FAILED_MSG((PyObject *)type, msg); } diff --git a/Python/specialize.c b/Python/specialize.c index ce6ebcd85052ea..9691483a67f56a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -232,7 +232,7 @@ _Py_PrintSpecializationStats(int to_file) hex_name[40] = '\0'; char buf[64]; assert(strlen(dirname) + 40 + strlen(".txt") < 64); - snprintf(buf, sizeof(buf), "%s%s.txt", dirname, hex_name); + PyOS_snprintf(buf, sizeof(buf), "%s%s.txt", dirname, hex_name); FILE *fout = fopen(buf, "w"); if (fout) { out = fout; From 61eee4d59701b42270c79514ba1c2a04cb8fb4ef Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 11:53:22 +0300 Subject: [PATCH 03/13] Convert more constant sized buffers --- Modules/_ctypes/_ctypes.c | 8 ++++---- Modules/unicodedata.c | 2 +- Objects/unicodeobject.c | 33 ++++++++++++++++++++++----------- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 8690f2c1b07852..e0b157ce7cf2d3 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -392,9 +392,9 @@ _ctypes_alloc_format_string_with_shape(int ndim, const Py_ssize_t *shape, strcat(new_prefix, "("); for (k = 0; k < ndim; ++k) { if (k < ndim-1) { - sprintf(buf, "%zd,", shape[k]); + PyOS_snprintf(buf, sizeof(buf), "%zd,", shape[k]); } else { - sprintf(buf, "%zd)", shape[k]); + PyOS_snprintf(buf, sizeof(buf), "%zd)", shape[k]); } strcat(new_prefix, buf); } @@ -4851,10 +4851,10 @@ PyCArrayType_from_ctype(PyObject *itemtype, Py_ssize_t length) return NULL; } #ifdef MS_WIN64 - sprintf(name, "%.200s_Array_%Id", + PyOS_snprintf(name, sizeof(name), "%.200s_Array_%Id", ((PyTypeObject *)itemtype)->tp_name, length); #else - sprintf(name, "%.200s_Array_%ld", + PyOS_snprintf(name, sizeof(name), "%.200s_Array_%ld", ((PyTypeObject *)itemtype)->tp_name, (long)length); #endif diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index c108f14871f946..0c86385e0facb6 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1108,7 +1108,7 @@ _getucname(PyObject *self, if (buflen < 28) /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */ return 0; - sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code); + PyOS_snprintf(buffer, buflen, "CJK UNIFIED IDEOGRAPH-%X", code); return 1; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f0c7aa7707fdb5..133ff01bd3ce5d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2455,33 +2455,42 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, if (*f == 'u') { if (longflag) { - len = sprintf(buffer, "%lu", va_arg(*vargs, unsigned long)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%lu", va_arg(*vargs, unsigned long)); } else if (longlongflag) { - len = sprintf(buffer, "%llu", va_arg(*vargs, unsigned long long)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%llu", va_arg(*vargs, unsigned long long)); } else if (size_tflag) { - len = sprintf(buffer, "%zu", va_arg(*vargs, size_t)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%zu", va_arg(*vargs, size_t)); } else { - len = sprintf(buffer, "%u", va_arg(*vargs, unsigned int)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%u", va_arg(*vargs, unsigned int)); } } else if (*f == 'x') { - len = sprintf(buffer, "%x", va_arg(*vargs, int)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%x", va_arg(*vargs, int)); } else { if (longflag) { - len = sprintf(buffer, "%li", va_arg(*vargs, long)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%li", va_arg(*vargs, long)); } else if (longlongflag) { - len = sprintf(buffer, "%lli", va_arg(*vargs, long long)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%lli", va_arg(*vargs, long long)); } else if (size_tflag) { - len = sprintf(buffer, "%zi", va_arg(*vargs, Py_ssize_t)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%zi", va_arg(*vargs, Py_ssize_t)); } else { - len = sprintf(buffer, "%i", va_arg(*vargs, int)); + len = PyOS_snprintf(buffer, sizeof(buffer), + "%i", va_arg(*vargs, int)); } } assert(len >= 0); @@ -2530,7 +2539,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, { char number[MAX_LONG_LONG_CHARS]; - len = sprintf(number, "%p", va_arg(*vargs, void*)); + len = PyOS_snprintf(number, sizeof(number), + "%p", va_arg(*vargs, void*)); assert(len >= 0); /* %p is ill-defined: ensure leading 0x. */ @@ -8104,7 +8114,8 @@ charmap_encoding_error( for (collpos = collstartpos; collpos < collendpos; ++collpos) { char buffer[2+29+1+1]; char *cp; - sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos)); + PyOS_snprintf(buffer, sizeof(buffer), + "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos)); for (cp = buffer; *cp; ++cp) { x = charmapencode_output(*cp, mapping, res, respos); if (x==enc_EXCEPTION) From ade334c5256bca3d81f16d15567dca65cbf229bf Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 12:30:33 +0300 Subject: [PATCH 04/13] Convert ones using direct allocation --- Modules/_ctypes/_ctypes.c | 5 +++-- Modules/_ctypes/stgdict.c | 2 +- Programs/_freeze_module.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index e0b157ce7cf2d3..70386e0347c17a 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3360,11 +3360,12 @@ static PPROC FindAddress(void *handle, const char *name, PyObject *type) funcname -> _funcname@ where n is 0, 4, 8, 12, ..., 128 */ - mangled_name = alloca(strlen(name) + 1 + 1 + 1 + 3); /* \0 _ @ %d */ + int toalloc = strlen(name) + 1 + 1 + 1 + 3; /* \0 _ @ %d */ + mangled_name = alloca(toalloc); if (!mangled_name) return NULL; for (i = 0; i < 32; ++i) { - sprintf(mangled_name, "_%s@%d", name, i*4); + PyOS_snprintf(mangled_name, toalloc, "_%s@%d", name, i*4); Py_BEGIN_ALLOW_THREADS address = (PPROC)GetProcAddress(handle, mangled_name); Py_END_ALLOW_THREADS diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index 83a52757d60979..cdcfe74473567f 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -612,7 +612,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct PyErr_NoMemory(); return -1; } - sprintf(buf, "%s:%s:", fieldfmt, fieldname); + PyOS_snprintf(buf, len + 2 + 1, "%s:%s:", fieldfmt, fieldname); ptr = stgdict->format; if (dict->shape != NULL) { diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c index 90fc2dc6e87da8..40ba49d2bdea9f 100644 --- a/Programs/_freeze_module.c +++ b/Programs/_freeze_module.c @@ -124,7 +124,7 @@ static PyObject * compile_and_marshal(const char *name, const char *text) { char *filename = (char *) malloc(strlen(name) + 10); - sprintf(filename, "", name); + PyOS_snprintf(filename, strlen(name) + 10, "", name); PyObject *code = Py_CompileStringExFlags(text, filename, Py_file_input, NULL, 0); free(filename); From 9bc086438d2aabb40a1c34c8b153ff5b25fbb72c Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 13:49:00 +0300 Subject: [PATCH 05/13] Finish! --- Modules/_ctypes/_ctypes.c | 6 ++++-- Objects/unicodeobject.c | 3 ++- Parser/string_parser.c | 2 +- Python/pystrtod.c | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 70386e0347c17a..cfa037a514fe98 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -2639,7 +2639,8 @@ unique_key(CDataObject *target, Py_ssize_t index) size_t bytes_left; Py_BUILD_ASSERT(sizeof(string) - 1 > sizeof(Py_ssize_t) * 2); - cp += sprintf(cp, "%x", Py_SAFE_DOWNCAST(index, Py_ssize_t, int)); + cp += PyOS_snprintf(cp, sizeof(Py_ssize_t), "%x", + Py_SAFE_DOWNCAST(index, Py_ssize_t, int)); while (target->b_base) { bytes_left = sizeof(string) - (cp - string) - 1; /* Hex format needs 2 characters per byte */ @@ -2648,7 +2649,8 @@ unique_key(CDataObject *target, Py_ssize_t index) "ctypes object structure too deep"); return NULL; } - cp += sprintf(cp, ":%x", Py_SAFE_DOWNCAST(target->b_index, Py_ssize_t, int)); + cp += PyOS_snprintf(cp, bytes_left, ":%x", + Py_SAFE_DOWNCAST(target->b_index, Py_ssize_t, int)); target = target->b_base; } return PyUnicode_FromStringAndSize(string, cp-string); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 133ff01bd3ce5d..b6bb8e07b6ce65 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -733,7 +733,8 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { - size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + size = PyOS_snprintf(str, 10, "&#%d;", + PyUnicode_READ(kind, data, i)); if (size < 0) { return NULL; } diff --git a/Parser/string_parser.c b/Parser/string_parser.c index c096bea7426e5c..8dac7edf79adb4 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -118,7 +118,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) w_len = PyUnicode_GET_LENGTH(w); for (i = 0; i < w_len; i++) { Py_UCS4 chr = PyUnicode_READ(kind, data, i); - sprintf(p, "\\U%08x", chr); + PyOS_snprintf(p, 10, "\\U%08x", chr); p += 10; } /* Should be impossible to overflow */ diff --git a/Python/pystrtod.c b/Python/pystrtod.c index d77b846f0403f0..9e07829dc5865e 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -1239,7 +1239,7 @@ format_float_short(double d, char format_code, /* Now that we've done zero padding, add an exponent if needed. */ if (use_exp) { *p++ = float_strings[OFS_E][0]; - exp_len = sprintf(p, "%+.02d", exp); + exp_len = PyOS_snprintf(p, 5, "%+.02d", exp); // see `bufsize` comments p += exp_len; } exit: From ca2acae5e24f285faf7372d8b9ad37258e089ff4 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 14:44:31 +0300 Subject: [PATCH 06/13] Fix CI --- Objects/unicodeobject.c | 2 +- Parser/string_parser.c | 2 +- Python/pystrtod.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b6bb8e07b6ce65..4d30f135bb028f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -733,7 +733,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { - size = PyOS_snprintf(str, 10, "&#%d;", + size = PyOS_snprintf(str, 10 + 1, "&#%d;", // see `incr` above PyUnicode_READ(kind, data, i)); if (size < 0) { return NULL; diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 8dac7edf79adb4..104b5e7b3397db 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -118,7 +118,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) w_len = PyUnicode_GET_LENGTH(w); for (i = 0; i < w_len; i++) { Py_UCS4 chr = PyUnicode_READ(kind, data, i); - PyOS_snprintf(p, 10, "\\U%08x", chr); + PyOS_snprintf(p, 10 + 1, "\\U%08x", chr); p += 10; } /* Should be impossible to overflow */ diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 9e07829dc5865e..92a410accb287a 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -1239,7 +1239,8 @@ format_float_short(double d, char format_code, /* Now that we've done zero padding, add an exponent if needed. */ if (use_exp) { *p++ = float_strings[OFS_E][0]; - exp_len = PyOS_snprintf(p, 5, "%+.02d", exp); // see `bufsize` comments + /* See `bufsize` comments for the max size. */ + exp_len = PyOS_snprintf(p, 5 + 1, "%+.02d", exp); p += exp_len; } exit: From dff1e731ddad99c7b92ba60798050e8194adeced Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 15:39:51 +0300 Subject: [PATCH 07/13] Bisect errors to a single place --- Objects/unicodeobject.c | 3 +-- Parser/string_parser.c | 2 +- Python/pystrtod.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4d30f135bb028f..133ff01bd3ce5d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -733,8 +733,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { - size = PyOS_snprintf(str, 10 + 1, "&#%d;", // see `incr` above - PyUnicode_READ(kind, data, i)); + size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); if (size < 0) { return NULL; } diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 104b5e7b3397db..c096bea7426e5c 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -118,7 +118,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) w_len = PyUnicode_GET_LENGTH(w); for (i = 0; i < w_len; i++) { Py_UCS4 chr = PyUnicode_READ(kind, data, i); - PyOS_snprintf(p, 10 + 1, "\\U%08x", chr); + sprintf(p, "\\U%08x", chr); p += 10; } /* Should be impossible to overflow */ diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 92a410accb287a..d77b846f0403f0 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -1239,8 +1239,7 @@ format_float_short(double d, char format_code, /* Now that we've done zero padding, add an exponent if needed. */ if (use_exp) { *p++ = float_strings[OFS_E][0]; - /* See `bufsize` comments for the max size. */ - exp_len = PyOS_snprintf(p, 5 + 1, "%+.02d", exp); + exp_len = sprintf(p, "%+.02d", exp); p += exp_len; } exit: From 6953f3081cde76299f4b82220aff0b6acf87f885 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 16:11:50 +0300 Subject: [PATCH 08/13] Fix Windows x86 --- Modules/_ctypes/_ctypes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index cfa037a514fe98..f6167bbf92c176 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -2639,7 +2639,7 @@ unique_key(CDataObject *target, Py_ssize_t index) size_t bytes_left; Py_BUILD_ASSERT(sizeof(string) - 1 > sizeof(Py_ssize_t) * 2); - cp += PyOS_snprintf(cp, sizeof(Py_ssize_t), "%x", + cp += PyOS_snprintf(cp, sizeof(string), "%x", Py_SAFE_DOWNCAST(index, Py_ssize_t, int)); while (target->b_base) { bytes_left = sizeof(string) - (cp - string) - 1; From 6c60488cf2169fb7c9ce99f4279d61c1756e3155 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 17:08:31 +0300 Subject: [PATCH 09/13] Convert `unicodeobject.c` --- Objects/unicodeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 133ff01bd3ce5d..f5444c6a7cbc3d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -733,7 +733,8 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { - size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + size = PyOS_snprintf(str, 10 + 1, // see `incr` for the size + "&#%d;", PyUnicode_READ(kind, data, i)); if (size < 0) { return NULL; } From cc74727b7f843fcbd8c19466633e863473731019 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 19:00:15 +0300 Subject: [PATCH 10/13] Fix `unicodeobject.c` --- Objects/unicodeobject.c | 52 ++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f5444c6a7cbc3d..5151ee0e039c0e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -683,13 +683,38 @@ backslashreplace(_PyBytesWriter *writer, char *str, return str; } +static Py_ssize_t +xmlcharrefreplace_get_incr(Py_UCS4 ch) +{ + Py_ssize_t incr; + + if (ch < 10) + incr = 2+1+1; + else if (ch < 100) + incr = 2+2+1; + else if (ch < 1000) + incr = 2+3+1; + else if (ch < 10000) + incr = 2+4+1; + else if (ch < 100000) + incr = 2+5+1; + else if (ch < 1000000) + incr = 2+6+1; + else { + assert(ch <= MAX_UNICODE); + incr = 2+7+1; + } + + return incr; +} + /* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings: ASCII, Latin1, UTF-8, etc. */ static char* xmlcharrefreplace(_PyBytesWriter *writer, char *str, PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend) { - Py_ssize_t size, i; + Py_ssize_t size, incr, i; Py_UCS4 ch; int kind; const void *data; @@ -700,25 +725,9 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, size = 0; /* determine replacement size */ for (i = collstart; i < collend; ++i) { - Py_ssize_t incr; - ch = PyUnicode_READ(kind, data, i); - if (ch < 10) - incr = 2+1+1; - else if (ch < 100) - incr = 2+2+1; - else if (ch < 1000) - incr = 2+3+1; - else if (ch < 10000) - incr = 2+4+1; - else if (ch < 100000) - incr = 2+5+1; - else if (ch < 1000000) - incr = 2+6+1; - else { - assert(ch <= MAX_UNICODE); - incr = 2+7+1; - } + incr = xmlcharrefreplace_get_incr(ch); + if (size > PY_SSIZE_T_MAX - incr) { PyErr_SetString(PyExc_OverflowError, "encoded result is too long for a Python string"); @@ -733,8 +742,9 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { - size = PyOS_snprintf(str, 10 + 1, // see `incr` for the size - "&#%d;", PyUnicode_READ(kind, data, i)); + ch = PyUnicode_READ(kind, data, i); + incr = xmlcharrefreplace_get_incr(ch); + size = PyOS_snprintf(str, incr + 1, "&#%d;", ch); if (size < 0) { return NULL; } From 95d6c5232e90d6f97581ed3fa95098654027040b Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 19:37:43 +0300 Subject: [PATCH 11/13] Convert `string_parser.c` --- Parser/string_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parser/string_parser.c b/Parser/string_parser.c index c096bea7426e5c..104b5e7b3397db 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -118,7 +118,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) w_len = PyUnicode_GET_LENGTH(w); for (i = 0; i < w_len; i++) { Py_UCS4 chr = PyUnicode_READ(kind, data, i); - sprintf(p, "\\U%08x", chr); + PyOS_snprintf(p, 10 + 1, "\\U%08x", chr); p += 10; } /* Should be impossible to overflow */ From c7dd34886e67838edb0124b8f77b824525697e28 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 9 Feb 2023 20:20:42 +0300 Subject: [PATCH 12/13] Convert `pystrtod.c` --- Python/pystrtod.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/pystrtod.c b/Python/pystrtod.c index d77b846f0403f0..3fd8d3c8336bae 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -1239,7 +1239,8 @@ format_float_short(double d, char format_code, /* Now that we've done zero padding, add an exponent if needed. */ if (use_exp) { *p++ = float_strings[OFS_E][0]; - exp_len = sprintf(p, "%+.02d", exp); + /* See `bufsize` comments for the size argument. */ + exp_len = PyOS_snprintf(p, 5, "%+.02d", exp); p += exp_len; } exit: From ae812b65842e98467e03d119f74bee6f3a51b643 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 10 Feb 2023 17:10:31 +0300 Subject: [PATCH 13/13] Address review comments --- Objects/unicodeobject.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5151ee0e039c0e..4f36c24d71517f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -684,25 +684,32 @@ backslashreplace(_PyBytesWriter *writer, char *str, } static Py_ssize_t -xmlcharrefreplace_get_incr(Py_UCS4 ch) +get_xmlcharref_length(Py_UCS4 ch) { Py_ssize_t incr; - if (ch < 10) - incr = 2+1+1; - else if (ch < 100) - incr = 2+2+1; - else if (ch < 1000) - incr = 2+3+1; - else if (ch < 10000) - incr = 2+4+1; - else if (ch < 100000) - incr = 2+5+1; - else if (ch < 1000000) - incr = 2+6+1; + /* `2 + 1` part is `&#` + `;` */ + if (ch < 10) { + incr = 2 + 1 + 1; + } + else if (ch < 100) { + incr = 2 + 2 + 1; + } + else if (ch < 1000) { + incr = 2 + 3 + 1; + } + else if (ch < 10000) { + incr = 2 + 4 + 1; + } + else if (ch < 100000) { + incr = 2 + 5 + 1; + } + else if (ch < 1000000) { + incr = 2 + 6 + 1; + } else { assert(ch <= MAX_UNICODE); - incr = 2+7+1; + incr = 2 + 7 + 1; } return incr; @@ -726,7 +733,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* determine replacement size */ for (i = collstart; i < collend; ++i) { ch = PyUnicode_READ(kind, data, i); - incr = xmlcharrefreplace_get_incr(ch); + incr = get_xmlcharref_length(ch); if (size > PY_SSIZE_T_MAX - incr) { PyErr_SetString(PyExc_OverflowError, @@ -743,7 +750,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, /* generate replacement */ for (i = collstart; i < collend; ++i) { ch = PyUnicode_READ(kind, data, i); - incr = xmlcharrefreplace_get_incr(ch); + incr = get_xmlcharref_length(ch); size = PyOS_snprintf(str, incr + 1, "&#%d;", ch); if (size < 0) { return NULL;