From 19b28fc7c9d35d49fd419375a49e6a60cd8a43f8 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 5 May 2020 23:44:08 +0200 Subject: [PATCH 01/45] CVE-2020-10735: Prevent DoS by very large int() The text to int parser of Python's int type is not safe against malicious input. Very large input strings with hundred thousands of digits can consume several seconds. The int() now limit the maximum amount of an input string to 5,000 digits. For comparison total amount of protons in the observable universe is known as Eddington number. That number has 80 digits. Signed-off-by: Christian Heimes --- Doc/library/functions.rst | 2 + Doc/library/json.rst | 9 +++++ Doc/library/sys.rst | 12 ++++++ Include/cpython/initconfig.h | 4 ++ Include/internal/pycore_interp.h | 2 + Include/longobject.h | 5 +++ Lib/test/test_int.py | 46 +++++++++++++++++++++++ Lib/test/test_json/test_decode.py | 8 ++++ Lib/test/test_xmlrpc.py | 8 ++++ Lib/xmlrpc/client.py | 10 ++++- Objects/longobject.c | 48 +++++++++++++++++++----- Python/clinic/sysmodule.c.h | 61 +++++++++++++++++++++++++++++++ Python/initconfig.c | 53 ++++++++++++++++++++++++++- Python/sysmodule.c | 43 ++++++++++++++++++++++ Tools/scripts/intdoshttpserver.py | 19 ++++++++++ Tools/scripts/intdostimeit.py | 45 +++++++++++++++++++++++ 16 files changed, 364 insertions(+), 11 deletions(-) create mode 100644 Tools/scripts/intdoshttpserver.py create mode 100644 Tools/scripts/intdostimeit.py diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index e86e1857c7a67d..6c335d64971012 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -910,6 +910,8 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.11 The delegation to :meth:`__trunc__` is deprecated. + .. versionchanged:: 3.12 + :class:`int` are now limited, :func:`sys.setintmaxdigits` TODO .. function:: isinstance(object, classinfo) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 467d5d9e1544d4..d0228c53ecaf69 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -23,6 +23,11 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. warning:: + Be cautious when parsing JSON data from untrusted sources. A malicious + JSON string may cause the decoder to consume considerable CPU and memory + resources. It's advised to limit the input to a sensible length. + :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. @@ -253,6 +258,10 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). + .. versionchanged:: 3.9 + The default implementation of *parse_int* limits the input string to + 5,000 digits to prevent denial of service attacks. + *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to raise an exception if invalid JSON numbers diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 43db4baf62dfb3..0654ee1423bad4 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -723,6 +723,12 @@ always available. .. versionadded:: 3.6 +.. function:: getintmaxdigits() + + Return limit for int digits, :func:`setintmaxdigits` TODO + + .. versionadded:: 3.9 + .. function:: getrefcount(object) Return the reference count of the *object*. The count returned is generally one @@ -1308,6 +1314,12 @@ always available. .. availability:: Unix. +.. function:: setintmaxdigits(n) + + Set maximum amount of int digits, :func:`getintmaxdigits` TODO + + .. versionadded:: 3.9 + .. function:: setprofile(profilefunc) .. index:: diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index c6057a4c3ed945..1f58c10637a1a7 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -218,6 +218,10 @@ typedef struct PyConfig { // If non-zero, we believe we're running from a source tree. int _is_python_build; + + /* global limit for long digits */ + Py_ssize_t intmaxdigits; + } PyConfig; PyAPI_FUNC(void) PyConfig_InitPythonConfig(PyConfig *config); diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index d71386953a0dd0..c8e8c35790d68b 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -176,6 +176,8 @@ struct _is { struct types_state types; struct callable_cache callable_cache; + Py_ssize_t intmaxdigits; + /* The following fields are here to avoid allocation during init. The data is exposed through PyInterpreterState pointer fields. These fields should not be accessed directly outside of init. diff --git a/Include/longobject.h b/Include/longobject.h index e559e238ae5a35..91d899eef8a4ab 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -71,6 +71,11 @@ PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLong(PyObject *); PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLongMask(PyObject *); PyAPI_FUNC(long long) PyLong_AsLongLongAndOverflow(PyObject *, int *); +/* Default limitation */ +#define _PY_LONG_DEFAULT_MAX_DIGITS 5000 +/* Don't check unless input / output is larger than threshold */ +#define _PY_LONG_MAX_DIGITS_TRESHOLD 1024 + PyAPI_FUNC(PyObject *) PyLong_FromString(const char *, char **, int); /* These aren't really part of the int object, but they're handy. The diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index a72699cc7506af..63b98d6fcb065b 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,3 +1,4 @@ +import contextlib import sys import unittest @@ -26,6 +27,17 @@ ("\u0200", ValueError) ] + +@contextlib.contextmanager +def setintmaxdigits(maxdigits): + current = sys.getintmaxdigits() + try: + sys.setintmaxdigits(maxdigits) + yield + finally: + sys.setintmaxdigits(current) + + class IntSubclass(int): pass @@ -576,6 +588,40 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7_8_9', 16), 0x123456789) self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) + def _test_maxdigits(self, c): + maxdigits = sys.getintmaxdigits() + # edge cases + c('1' * maxdigits) + c(' ' + '1' * maxdigits) + c('+' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + + # disable limitation + with setintmaxdigits(0): + c('1' * (maxdigits + 1)) + c('1' * (maxdigits + 1)) + + # OverflowError + def check(i, base=None): + with self.assertRaises(OverflowError): + if base is None: + c(i) + else: + c(i, base) + + with setintmaxdigits(1024): + maxdigits = 1024 + check('1' * (maxdigits + 1)) + check('+' + '1' * (maxdigits + 1)) + check('1' * (maxdigits + 1)) + + i = 10 ** maxdigits + with self.assertRaises(OverflowError): + str(i) + + def test_maxdigits(self): + self._test_maxdigits(int) + self._test_maxdigits(IntSubclass) if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index fdb9e62124ece1..2742bce5f0324d 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -2,6 +2,7 @@ from io import StringIO from collections import OrderedDict from test.test_json import PyTest, CTest +import sys class TestDecode: @@ -95,5 +96,12 @@ def test_negative_index(self): d = self.json.JSONDecoder() self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) + def test_limit_int(self): + maxdigits = sys.getintmaxdigits() + self.loads('1' * maxdigits) + with self.assertRaises(OverflowError): + self.loads('1' * (maxdigits + 1)) + + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 6c4601ddaddcbd..6460799ecbf109 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -289,6 +289,14 @@ def test_load_extension_types(self): check('9876543210.0123456789', decimal.Decimal('9876543210.0123456789')) + def test_limit_int(self): + check = self.check_loads + with self.assertRaises(OverflowError): + check('123456780123456789', None) + with self.assertRaises(OverflowError): + s = '1' * (sys.getintmaxdigits() + 1) + check(f'{s}', None) + def test_get_host_info(self): # see bug #3613, this raised a TypeError transp = xmlrpc.client.Transport() diff --git a/Lib/xmlrpc/client.py b/Lib/xmlrpc/client.py index bef23f4505e03c..1ba11b2db9e75d 100644 --- a/Lib/xmlrpc/client.py +++ b/Lib/xmlrpc/client.py @@ -742,14 +742,22 @@ def end_boolean(self, data): dispatch["boolean"] = end_boolean def end_int(self, data): + if len(data.strip()) > 16: + # XML-RPC ints are signed int32 with 11 chars text max + raise OverflowError("int exceeds XML-RPC limits") self.append(int(data)) self._value = 0 + dispatch["i1"] = end_int dispatch["i2"] = end_int dispatch["i4"] = end_int dispatch["i8"] = end_int dispatch["int"] = end_int - dispatch["biginteger"] = end_int + + def end_bigint(self, data): + self.append(int(data)) + self._value = 0 + dispatch["biginteger"] = end_bigint def end_double(self, data): self.append(float(data)) diff --git a/Objects/longobject.c b/Objects/longobject.c index 90ed02b8c27a19..e60f9bb108ac15 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1815,6 +1815,15 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } + if (strlen > _PY_LONG_MAX_DIGITS_TRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if ((interp->intmaxdigits > 0) && (strlen > interp->intmaxdigits)) { + Py_DECREF(scratch); + PyErr_SetString(PyExc_OverflowError, + "too many digits in integer"); + return -1; + } + } if (writer) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { Py_DECREF(scratch); @@ -2267,6 +2276,7 @@ long_from_binary_base(const char **str, int base, PyLongObject **res) * * If unsuccessful, NULL will be returned. */ + PyObject * PyLong_FromString(const char *str, char **pend, int base) { @@ -2328,6 +2338,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { + /* binary bases are not limited by intmaxdigits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2479,6 +2490,16 @@ digit beyond the first. goto onError; } + slen = scan - str; + if (slen > _PY_LONG_MAX_DIGITS_TRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if ((interp->intmaxdigits > 0 ) && (slen > interp->intmaxdigits)) { + PyErr_SetString(PyExc_OverflowError, + "too many digits in integer"); + return NULL; + } + } + /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -5355,18 +5376,20 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) } return PyLong_FromLong(0L); } + /* default base and limit, forward to standard implementation */ if (obase == NULL) return PyNumber_Long(x); - base = PyNumber_AsSsize_t(obase, NULL); - if (base == -1 && PyErr_Occurred()) - return NULL; - if ((base != 0 && base < 2) || base > 36) { - PyErr_SetString(PyExc_ValueError, - "int() base must be >= 2 and <= 36, or 0"); - return NULL; + if (obase != NULL) { + base = PyNumber_AsSsize_t(obase, NULL); + if (base == -1 && PyErr_Occurred()) + return NULL; + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "int() base must be >= 2 and <= 36, or 0"); + return NULL; + } } - if (PyUnicode_Check(x)) return PyLong_FromUnicodeObject(x, (int)base); else if (PyByteArray_Check(x) || PyBytes_Check(x)) { @@ -6090,6 +6113,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_digits", "maximum digits limitation"}, + {"max_digits_threshold", "minimum threshold to check for max digits"}, {NULL, NULL} }; @@ -6097,7 +6122,7 @@ static PyStructSequence_Desc int_info_desc = { "sys.int_info", /* name */ int_info__doc__, /* doc */ int_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -6112,6 +6137,10 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_DIGITS)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_MAX_DIGITS_TRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -6139,6 +6168,7 @@ _PyLong_InitTypes(PyInterpreterState *interp) return _PyStatus_ERR("can't init int info type"); } } + interp->intmaxdigits = _PyInterpreterState_GetConfig(interp)->intmaxdigits; return _PyStatus_OK(); } diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index ddf01a7ccdda08..9983c9539110fe 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -745,6 +745,67 @@ sys_mdebug(PyObject *module, PyObject *arg) #endif /* defined(USE_MALLOPT) */ +PyDoc_STRVAR(sys_getintmaxdigits__doc__, +"getintmaxdigits($module, /)\n" +"--\n" +"\n" +"TODO"); + +#define SYS_GETINTMAXDIGITS_METHODDEF \ + {"getintmaxdigits", (PyCFunction)sys_getintmaxdigits, METH_NOARGS, sys_getintmaxdigits__doc__}, + +static PyObject * +sys_getintmaxdigits_impl(PyObject *module); + +static PyObject * +sys_getintmaxdigits(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys_getintmaxdigits_impl(module); +} + +PyDoc_STRVAR(sys_setintmaxdigits__doc__, +"setintmaxdigits($module, /, maxdigits)\n" +"--\n" +"\n" +"TODO"); + +#define SYS_SETINTMAXDIGITS_METHODDEF \ + {"setintmaxdigits", _PyCFunction_CAST(sys_setintmaxdigits), METH_FASTCALL|METH_KEYWORDS, sys_setintmaxdigits__doc__}, + +static PyObject * +sys_setintmaxdigits_impl(PyObject *module, Py_ssize_t maxdigits); + +static PyObject * +sys_setintmaxdigits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"maxdigits", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "setintmaxdigits", 0}; + PyObject *argsbuf[1]; + Py_ssize_t maxdigits; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + maxdigits = ival; + } + return_value = sys_setintmaxdigits_impl(module, maxdigits); + +exit: + return return_value; +} + PyDoc_STRVAR(sys_getrefcount__doc__, "getrefcount($module, object, /)\n" "--\n" diff --git a/Python/initconfig.c b/Python/initconfig.c index 33a8f276b19cbf..c6992f1a947c32 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -124,7 +124,9 @@ The following implementation-specific options are available:\n\ do nothing if is not supported on the current system. The default value is \"off\".\n\ \n\ -X frozen_modules=[on|off]: whether or not frozen modules should be used.\n\ - The default is \"on\" (or \"off\" if you are running a local build)."; + The default is \"on\" (or \"off\" if you are running a local build).\n\ +\n\ +-X intmaxdigits=number: limit maximum digits ints."; /* Envvars that don't have equivalent command-line options are listed first */ static const char usage_envvars[] = @@ -144,6 +146,7 @@ static const char usage_envvars[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" +"PYTHONINTMAXDIGITS: limt maximum digits when converting from or to int\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" @@ -782,6 +785,8 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->safe_path = 0; config->_is_python_build = 0; config->code_debug_ranges = 1; + /* config_init_intmaxdigits() sets default limit */ + config->intmaxdigits = -1; } @@ -1008,6 +1013,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(safe_path); COPY_WSTRLIST(orig_argv); COPY_ATTR(_is_python_build); + COPY_ATTR(intmaxdigits); #undef COPY_ATTR #undef COPY_WSTR_ATTR @@ -1115,6 +1121,7 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_INT(use_frozen_modules); SET_ITEM_INT(safe_path); SET_ITEM_INT(_is_python_build); + SET_ITEM_INT(intmaxdigits); return dict; @@ -1762,6 +1769,42 @@ config_init_tracemalloc(PyConfig *config) return _PyStatus_OK(); } +static PyStatus +config_init_intmaxdigits(PyConfig *config) +{ + int maxdigits; + int valid = 0; + + /* set default limitation */ + config->intmaxdigits = _PY_LONG_DEFAULT_MAX_DIGITS; + + const char *env = config_get_env(config, "PYTHONINTMAXDIGITS"); + if (env) { + if (!_Py_str_to_int(env, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_TRESHOLD)); + } + if (!valid) { + return _PyStatus_ERR("PYTHONINTMAXDIGITS: invalid limit"); + } + config->intmaxdigits = maxdigits; + } + + const wchar_t *xoption = config_get_xoption(config, L"intmaxdigits"); + if (xoption) { + const wchar_t *sep = wcschr(xoption, L'='); + if (sep) { + if (!config_wstr_to_int(sep + 1, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_TRESHOLD)); + } + } + if (!valid) { + return _PyStatus_ERR("-X intmaxdigits: " + "invalid limit"); + } + config->intmaxdigits = maxdigits; + } + return _PyStatus_OK(); +} static PyStatus config_init_pycache_prefix(PyConfig *config) @@ -1818,6 +1861,7 @@ config_read_complex_options(PyConfig *config) return status; } } + if (config->perf_profiling < 0) { status = config_init_perf_profiling(config); if (_PyStatus_EXCEPTION(status)) { @@ -1825,6 +1869,13 @@ config_read_complex_options(PyConfig *config) } } + if (config->intmaxdigits < 0) { + status = config_init_intmaxdigits(config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } + if (config->pycache_prefix == NULL) { status = config_init_pycache_prefix(config); if (_PyStatus_EXCEPTION(status)) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 75e64553d88c9f..a84da12b798e6a 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1671,6 +1671,45 @@ sys_mdebug_impl(PyObject *module, int flag) } #endif /* USE_MALLOPT */ + +/*[clinic input] +sys.getintmaxdigits + +TODO +[clinic start generated code]*/ + +static PyObject * +sys_getintmaxdigits_impl(PyObject *module) +/*[clinic end generated code: output=be8245491b631377 input=a5e40c7ebebc3bc2]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return PyLong_FromSsize_t(interp->intmaxdigits); +} + +/*[clinic input] +sys.setintmaxdigits + + maxdigits: Py_ssize_t + +TODO +[clinic start generated code]*/ + +static PyObject * +sys_setintmaxdigits_impl(PyObject *module, Py_ssize_t maxdigits) +/*[clinic end generated code: output=f08310ce0abd3fc7 input=6c9f05282da2c64e]*/ +{ + PyThreadState *tstate = _PyThreadState_GET(); + if ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_TRESHOLD)) { + tstate->interp->intmaxdigits = maxdigits; + Py_RETURN_NONE; + } else { + PyErr_Format( + PyExc_ValueError, "maxdigits must be 0 or larger than %zd", + _PY_LONG_MAX_DIGITS_TRESHOLD); + return NULL; + } +} + size_t _PySys_GetSizeOf(PyObject *o) { @@ -2186,6 +2225,8 @@ static PyMethodDef sys_methods[] = { SYS_DEACTIVATE_STACK_TRAMPOLINE_METHODDEF SYS_IS_STACK_TRAMPOLINE_ACTIVE_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF + SYS_GETINTMAXDIGITS_METHODDEF + SYS_SETINTMAXDIGITS_METHODDEF #ifdef Py_STATS SYS__STATS_ON_METHODDEF SYS__STATS_OFF_METHODDEF @@ -2686,6 +2727,7 @@ static PyStructSequence_Field flags_fields[] = { {"utf8_mode", "-X utf8"}, {"warn_default_encoding", "-X warn_default_encoding"}, {"safe_path", "-P"}, + {"intmaxdigits", "-X intmaxdigits"}, {0} }; @@ -2734,6 +2776,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags) SetFlag(preconfig->utf8_mode); SetFlag(config->warn_default_encoding); SetFlagObj(PyBool_FromLong(config->safe_path)); + SetFlagObj(PyLong_FromSsize_t(config->intmaxdigits)); #undef SetFlagObj #undef SetFlag return 0; diff --git a/Tools/scripts/intdoshttpserver.py b/Tools/scripts/intdoshttpserver.py new file mode 100644 index 00000000000000..710b0ec244d3c1 --- /dev/null +++ b/Tools/scripts/intdoshttpserver.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +import http.server + + +class IntDosRequestHandler(http.server.BaseHTTPRequestHandler): + content_length_digits = 5 + cookie_version_digits = 40_000 + + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", "1" + ("0" * self.content_length_digits)) + self.send_header("Cookie", "version=1" + ("0" * self.cookie_version_digits)) + self.end_headers() + self.wfile.write(b"Really long content-length") + + +if __name__ == "__main__": + http.server.test(HandlerClass=IntDosRequestHandler) diff --git a/Tools/scripts/intdostimeit.py b/Tools/scripts/intdostimeit.py new file mode 100644 index 00000000000000..e8d33e6f1400fd --- /dev/null +++ b/Tools/scripts/intdostimeit.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +""" + +Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz + +100 digits +1000000 loops, best of 5: 371 nsec per loop +1000 digits +50000 loops, best of 5: 7.94 usec per loop +5000 digits +2000 loops, best of 5: 142 usec per loop +10000 digits +500 loops, best of 5: 543 usec per loop +25000 digits +100 loops, best of 5: 3.31 msec per loop +50000 digits +20 loops, best of 5: 12.8 msec per loop +100000 digits +5 loops, best of 5: 52.4 msec per loop +250000 digits +1 loop, best of 5: 318 msec per loop +500000 digits +1 loop, best of 5: 1.27 sec per loop +1000000 digits +1 loop, best of 5: 5.2 sec per loop +""" +import timeit + + +for i in [ + 100, + 1_000, + 5_000, + 10_000, + 25_000, + 50_000, + 100_000, + 250_000, + 500_000, + 1_000_000, +]: + print(f"{i} digits") + timeit.main( + ["-s", f"s = '1' + ('0' * {i})", "int(s)",] + ) From 0a96b20a136f42f136ced5cbb7ab9aaf0cda55cb Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 19 Jan 2022 10:21:35 +0100 Subject: [PATCH 02/45] Default to disable, improve tests and docs --- Doc/c-api/init_config.rst | 9 +++++++++ Doc/library/sys.rst | 32 ++++++++++++++++++++---------- Doc/using/cmdline.rst | 9 ++++++++- Include/longobject.h | 9 +++++++-- Lib/test/support/__init__.py | 12 +++++++++++ Lib/test/test_cmd_line.py | 33 +++++++++++++++++++++++++++++++ Lib/test/test_embed.py | 1 + Lib/test/test_int.py | 32 +++++++++++------------------- Lib/test/test_json/test_decode.py | 10 ++++++---- Lib/test/test_sys.py | 8 ++++++-- Lib/test/test_xmlrpc.py | 6 ++++-- Objects/longobject.c | 5 ++++- Python/initconfig.c | 4 ++-- 13 files changed, 125 insertions(+), 45 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index c4a342ee811ca9..01d8dfefd45f65 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -828,6 +828,15 @@ PyConfig Default: ``0``. + .. c:member:: int intmaxdigits + + If greater than 0, enable int digit limitation. + + Configured by :option:`-X intmaxdigits <-X>` command line option or + :envvar:`PYTHONINTMAXDIGITS` env var. + + Default: ``-1``. + .. c:member:: int isolated If greater than ``0``, enable isolated mode: diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 0654ee1423bad4..33cee0d7d41206 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -521,6 +521,7 @@ always available. :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) :const:`utf8_mode` :option:`-X utf8 <-X>` :const:`safe_path` :option:`-P` + :const:`intmaxdigits` `:option:``-X intmaxdigits <-X>` (default: *-1*) ============================= ================================================================ .. versionchanged:: 3.2 @@ -543,6 +544,9 @@ always available. .. versionchanged:: 3.11 Added the ``safe_path`` attribute for :option:`-P` option. + .. versionchanged:: 3.12 + Added ``intmaxdigits`` attribute + .. data:: float_info @@ -1002,19 +1006,27 @@ always available. .. tabularcolumns:: |l|L| - +-------------------------+----------------------------------------------+ - | Attribute | Explanation | - +=========================+==============================================+ - | :const:`bits_per_digit` | number of bits held in each digit. Python | - | | integers are stored internally in base | - | | ``2**int_info.bits_per_digit`` | - +-------------------------+----------------------------------------------+ - | :const:`sizeof_digit` | size in bytes of the C type used to | - | | represent a digit | - +-------------------------+----------------------------------------------+ + +-------------------------------------+-----------------------------------------------+ + | Attribute | Explanation | + +=====================================+===============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +-------------------------------------+-----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +-------------------------------------+-----------------------------------------------+ + | :const:`default_max_digits` | default value for :func:`sys.getintmaxdigits` | + +-------------------------------------+-----------------------------------------------+ + | :const:`max_digits_check_threshold` | minimum value value for | + | | :func:`sys.setintmaxdigits` | + +-------------------------------------+-----------------------------------------------+ .. versionadded:: 3.1 + .. versionchanged:: 3.11 + Added ``default_max_digits`` and ``max_digits_check_threshold``. + .. data:: __interactivehook__ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index fa2b07e468b3b5..a73311fe29d515 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -505,11 +505,13 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. + * ``-X intmaxdigits`` to enable or disable int conversion limit. + See also :envvar:`PYTHONPROFILEIMPORTTIME`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded application. Typical usage is ``python3 -X importtime -c 'import - asyncio'``. See also :envvar:`PYTHONPROFILEIMPORTTIME`. + asyncio'``. See also :envvar:`PYTHONINTMAXDIGITS`. * ``-X dev``: enable :ref:`Python Development Mode `, introducing additional runtime checks that are too expensive to be enabled by default. @@ -763,6 +765,11 @@ conflict. .. versionadded:: 3.2.3 +.. envvar:: PYTHONINTMAXDIGITS + + TODO + + .. versionadded:: 3.11 .. envvar:: PYTHONIOENCODING diff --git a/Include/longobject.h b/Include/longobject.h index 91d899eef8a4ab..8be8a14ef7a093 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -72,8 +72,13 @@ PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLongMask(PyObject *); PyAPI_FUNC(long long) PyLong_AsLongLongAndOverflow(PyObject *, int *); /* Default limitation */ -#define _PY_LONG_DEFAULT_MAX_DIGITS 5000 -/* Don't check unless input / output is larger than threshold */ +#ifndef _PY_LONG_DEFAULT_MAX_DIGITS +# define _PY_LONG_DEFAULT_MAX_DIGITS 0 +#endif +/* Threshold for max digits check. For performance reasons int() and + int.__str__ don't checks values that are smaller than the + threshold. For common cases it avoids a lookup of the interpreter + state in a hot path */ #define _PY_LONG_MAX_DIGITS_TRESHOLD 1024 PyAPI_FUNC(PyObject *) PyLong_FromString(const char *, char **, int); diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 2409fb05d72891..241ca2f6404a44 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2341,3 +2341,15 @@ def sleeping_retry(timeout, err_msg=None, /, time.sleep(delay) delay = min(delay * 2, max_delay) + + +@contextlib.contextmanager +def setintmaxdigits(maxdigits): + """Set integer max digits limit + """ + current = sys.getintmaxdigits() + try: + sys.setintmaxdigits(maxdigits) + yield + finally: + sys.setintmaxdigits(current) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 64469763957a58..1c5ccf7db8c195 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -3,6 +3,7 @@ # See test_cmd_line_script.py for testing of script execution import os +import re import subprocess import sys import tempfile @@ -865,6 +866,38 @@ def test_parsing_error(self): self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) self.assertNotEqual(proc.returncode, 0) + def test_intmaxdigits(self): + code = "import sys; print(sys.flags.intmaxdigits, sys.getintmaxdigits())" + + assert_python_failure('-X', 'intmaxdigits', '-c', code) + assert_python_failure('-X', 'intmaxdigits=foo', '-c', code) + assert_python_failure('-X', 'intmaxdigits=100', '-c', code) + + assert_python_failure('-c', code, PYTHONINTMAXDIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXDIGITS='100') + + def res2int(res): + out = res.out.strip().decode("utf-8") + return tuple(int(i) for i in out.split()) + + res = assert_python_ok('-c', code) + self.assertEqual(res2int(res), (-1, sys.getintmaxdigits())) + res = assert_python_ok('-X', 'intmaxdigits=0', '-c', code) + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-X', 'intmaxdigits=4000', '-c', code) + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok('-X', 'intmaxdigits=100000', '-c', code) + self.assertEqual(res2int(res), (100000, 100000)) + + res = assert_python_ok('-c', code, PYTHONINTMAXDIGITS='0') + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-c', code, PYTHONINTMAXDIGITS='4000') + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok( + '-X', 'intmaxdigits=6000', '-c', code, PYTHONINTMAXDIGITS='4000' + ) + self.assertEqual(res2int(res), (6000, 6000)) + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 70d7367ea9e64f..ed13c2a50a7374 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -434,6 +434,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'install_signal_handlers': 1, 'use_hash_seed': 0, 'hash_seed': 0, + 'intmaxdigits': -1, 'faulthandler': 0, 'tracemalloc': 0, 'perf_profiling': 0, diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 63b98d6fcb065b..f404ed81cc242d 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,3 @@ -import contextlib import sys import unittest @@ -28,16 +27,6 @@ ] -@contextlib.contextmanager -def setintmaxdigits(maxdigits): - current = sys.getintmaxdigits() - try: - sys.setintmaxdigits(maxdigits) - yield - finally: - sys.setintmaxdigits(current) - - class IntSubclass(int): pass @@ -590,16 +579,17 @@ def test_issue31619(self): def _test_maxdigits(self, c): maxdigits = sys.getintmaxdigits() - # edge cases - c('1' * maxdigits) - c(' ' + '1' * maxdigits) - c('+' + '1' * maxdigits) - self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + if maxdigits != 0: + # edge cases + c('1' * maxdigits) + c(' ' + '1' * maxdigits) + c('+' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) # disable limitation - with setintmaxdigits(0): - c('1' * (maxdigits + 1)) - c('1' * (maxdigits + 1)) + with support.setintmaxdigits(0): + i = c('1' * 100_000) + str(i) # OverflowError def check(i, base=None): @@ -609,8 +599,8 @@ def check(i, base=None): else: c(i, base) - with setintmaxdigits(1024): - maxdigits = 1024 + maxdigits = 1024 + with support.setintmaxdigits(maxdigits): check('1' * (maxdigits + 1)) check('+' + '1' * (maxdigits + 1)) check('1' * (maxdigits + 1)) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 2742bce5f0324d..1fade1179b28dd 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -2,6 +2,7 @@ from io import StringIO from collections import OrderedDict from test.test_json import PyTest, CTest +from test import support import sys @@ -97,10 +98,11 @@ def test_negative_index(self): self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) def test_limit_int(self): - maxdigits = sys.getintmaxdigits() - self.loads('1' * maxdigits) - with self.assertRaises(OverflowError): - self.loads('1' * (maxdigits + 1)) + maxdigits = 5000 + with support.setintmaxdigits(maxdigits): + self.loads('1' * maxdigits) + with self.assertRaises(OverflowError): + self.loads('1' * (maxdigits + 1)) class TestPyDecode(TestDecode, PyTest): pass diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 202fb30a8a7e9d..b73f697b1dae54 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -550,11 +550,15 @@ def test_attributes(self): self.assertIsInstance(sys.executable, str) self.assertEqual(len(sys.float_info), 11) self.assertEqual(sys.float_info.radix, 2) - self.assertEqual(len(sys.int_info), 2) + self.assertEqual(len(sys.int_info), 4) self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) self.assertTrue(sys.int_info.sizeof_digit >= 1) + self.assertGreaterEqual(sys.int_info.default_max_digits, 0) + self.assertGreaterEqual(sys.int_info.max_digits_check_threshold, 0) self.assertEqual(type(sys.int_info.bits_per_digit), int) self.assertEqual(type(sys.int_info.sizeof_digit), int) + self.assertIsInstance(sys.int_info.default_max_digits, int) + self.assertIsInstance(sys.int_info.max_digits_check_threshold, int) self.assertIsInstance(sys.hexversion, int) self.assertEqual(len(sys.hash_info), 9) @@ -677,7 +681,7 @@ def test_sys_flags(self): "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", "hash_randomization", "isolated", "dev_mode", "utf8_mode", - "warn_default_encoding", "safe_path") + "warn_default_encoding", "safe_path", "intmaxdigits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr in ("dev_mode", "safe_path") else int diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 6460799ecbf109..1f795862c97130 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -294,8 +294,10 @@ def test_limit_int(self): with self.assertRaises(OverflowError): check('123456780123456789', None) with self.assertRaises(OverflowError): - s = '1' * (sys.getintmaxdigits() + 1) - check(f'{s}', None) + maxdigits = 5000 + with support.setintmaxdigits(maxdigits): + s = '1' * (maxdigits + 1) + check(f'{s}', None) def test_get_host_info(self): # see bug #3613, this raised a TypeError diff --git a/Objects/longobject.c b/Objects/longobject.c index e60f9bb108ac15..50b6bfeb2ad370 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6114,7 +6114,7 @@ static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, {"default_max_digits", "maximum digits limitation"}, - {"max_digits_threshold", "minimum threshold to check for max digits"}, + {"max_digits_check_threshold", "minimum threshold to check for max digits"}, {NULL, NULL} }; @@ -6169,6 +6169,9 @@ _PyLong_InitTypes(PyInterpreterState *interp) } } interp->intmaxdigits = _PyInterpreterState_GetConfig(interp)->intmaxdigits; + if (interp->intmaxdigits == -1) { + interp->intmaxdigits = _PY_LONG_DEFAULT_MAX_DIGITS; + } return _PyStatus_OK(); } diff --git a/Python/initconfig.c b/Python/initconfig.c index c6992f1a947c32..5a0d500b80dc72 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -1775,8 +1775,8 @@ config_init_intmaxdigits(PyConfig *config) int maxdigits; int valid = 0; - /* set default limitation */ - config->intmaxdigits = _PY_LONG_DEFAULT_MAX_DIGITS; + /* default to unconfigured, _PyLong_InitTypes() does the rest */ + config->intmaxdigits = -1; const char *env = config_get_env(config, "PYTHONINTMAXDIGITS"); if (env) { From 88f6d5d9a8427c9b1e5593cbc3041b71a1896426 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 19 Jan 2022 10:25:54 +0100 Subject: [PATCH 03/45] fix typo --- Doc/library/sys.rst | 2 +- Include/longobject.h | 2 +- Objects/longobject.c | 6 +++--- Python/initconfig.c | 4 ++-- Python/sysmodule.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 33cee0d7d41206..8eede72b814bb2 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -521,7 +521,7 @@ always available. :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) :const:`utf8_mode` :option:`-X utf8 <-X>` :const:`safe_path` :option:`-P` - :const:`intmaxdigits` `:option:``-X intmaxdigits <-X>` (default: *-1*) + :const:`intmaxdigits` :option:`-X intmaxdigits <-X>` (default: *-1*) ============================= ================================================================ .. versionchanged:: 3.2 diff --git a/Include/longobject.h b/Include/longobject.h index 8be8a14ef7a093..f3c6803eeb2bdb 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -79,7 +79,7 @@ PyAPI_FUNC(long long) PyLong_AsLongLongAndOverflow(PyObject *, int *); int.__str__ don't checks values that are smaller than the threshold. For common cases it avoids a lookup of the interpreter state in a hot path */ -#define _PY_LONG_MAX_DIGITS_TRESHOLD 1024 +#define _PY_LONG_MAX_DIGITS_THRESHOLD 1024 PyAPI_FUNC(PyObject *) PyLong_FromString(const char *, char **, int); diff --git a/Objects/longobject.c b/Objects/longobject.c index 50b6bfeb2ad370..9990cc9e0343a1 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1815,7 +1815,7 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } - if (strlen > _PY_LONG_MAX_DIGITS_TRESHOLD) { + if (strlen > _PY_LONG_MAX_DIGITS_THRESHOLD) { PyInterpreterState *interp = _PyInterpreterState_GET(); if ((interp->intmaxdigits > 0) && (strlen > interp->intmaxdigits)) { Py_DECREF(scratch); @@ -2491,7 +2491,7 @@ digit beyond the first. } slen = scan - str; - if (slen > _PY_LONG_MAX_DIGITS_TRESHOLD) { + if (slen > _PY_LONG_MAX_DIGITS_THRESHOLD) { PyInterpreterState *interp = _PyInterpreterState_GET(); if ((interp->intmaxdigits > 0 ) && (slen > interp->intmaxdigits)) { PyErr_SetString(PyExc_OverflowError, @@ -6140,7 +6140,7 @@ PyLong_GetInfo(void) PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(_PY_LONG_DEFAULT_MAX_DIGITS)); PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(_PY_LONG_MAX_DIGITS_TRESHOLD)); + PyLong_FromLong(_PY_LONG_MAX_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; diff --git a/Python/initconfig.c b/Python/initconfig.c index 5a0d500b80dc72..e3b9810cb1e902 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -1781,7 +1781,7 @@ config_init_intmaxdigits(PyConfig *config) const char *env = config_get_env(config, "PYTHONINTMAXDIGITS"); if (env) { if (!_Py_str_to_int(env, &maxdigits)) { - valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_TRESHOLD)); + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)); } if (!valid) { return _PyStatus_ERR("PYTHONINTMAXDIGITS: invalid limit"); @@ -1794,7 +1794,7 @@ config_init_intmaxdigits(PyConfig *config) const wchar_t *sep = wcschr(xoption, L'='); if (sep) { if (!config_wstr_to_int(sep + 1, &maxdigits)) { - valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_TRESHOLD)); + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)); } } if (!valid) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a84da12b798e6a..bf5761d2a66baa 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1699,13 +1699,13 @@ sys_setintmaxdigits_impl(PyObject *module, Py_ssize_t maxdigits) /*[clinic end generated code: output=f08310ce0abd3fc7 input=6c9f05282da2c64e]*/ { PyThreadState *tstate = _PyThreadState_GET(); - if ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_TRESHOLD)) { + if ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)) { tstate->interp->intmaxdigits = maxdigits; Py_RETURN_NONE; } else { PyErr_Format( PyExc_ValueError, "maxdigits must be 0 or larger than %zd", - _PY_LONG_MAX_DIGITS_TRESHOLD); + _PY_LONG_MAX_DIGITS_THRESHOLD); return NULL; } } From 70c195ea1841cf6fbf069699248138f81ab2ced4 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 19 Jan 2022 13:43:02 +0100 Subject: [PATCH 04/45] More docs (WIP) --- Doc/library/functions.rst | 3 +- Doc/library/stdtypes.rst | 66 +++++++++++++++++++++++++++++++++++++++ Doc/library/sys.rst | 8 +++-- Doc/using/cmdline.rst | 7 +++-- 4 files changed, 77 insertions(+), 7 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 6c335d64971012..deb12c9fee7aae 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -911,7 +911,8 @@ are always available. They are listed here in alphabetical order. The delegation to :meth:`__trunc__` is deprecated. .. versionchanged:: 3.12 - :class:`int` are now limited, :func:`sys.setintmaxdigits` TODO + :class:`int` string inputs are now limited, see :ref:`int maximum + digits limitation `. .. function:: isinstance(object, classinfo) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index f68cf46a6c619c..5f9219b8b9eec1 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5460,6 +5460,72 @@ types, where they are relevant. Some of these are not reported by the [] +.. _intmaxdigits: + +Integer maximum digits limitation +================================= + +CPython has a global limit for converting between :class:`int` and class:`str` +to mitigate denial of service attacks. The limit is necessary because there +exists no efficient algorithm, that can convert a string to an integer or +an integer to a string in linear time, unless the base is a power of *2*. Even +the best known algorithms for base *10* have sub-quadratic complexity. A large +input like:: + + int('1' * 500_000) + +takes about a second at 100% CPU load on an X86_64 CPU from 2020 with 4.2 GHz +max frequency. + +Configure limitations +--------------------- + +* :data:`sys.int_info.default_max_digits` is the compiled-in default value. +* :data:`sys.int_info.max_digits_check_threshold` is the minimum limit for + digit limitation. For performance reasons Python does not check + +* :envvar:`PYTHONINTMAXDIGITS`, e.g. ``PYTHONINTMAXDIGITS=4096 python3`` to + set the limit to ``4096`` or ``PYTHONINTMAXDIGITS=0 python3`` to disable + the limitation +* :option:`-X intmaxdigits <-X>`, e.g. ``python3 -X intmaxdigits=4096`` +* :data:`sys.flags.intmaxdigits` contains the value of + :envvar:`PYTHONINTMAXDIGITS` or :option:`-X intmaxdigits <-X>`. In case + both the env var and the ``-X`` option are set, the ``-X`` option takes + precedence. The flag defaults to *-1*. + +* :func:`sys.getintmaxdigits` and :func:`sys.setintmaxdigits` are getter + and setter for interpreter-wide limit. + +Recommended configuration:: + + import sys + if hasattr(sys.flags, "intmaxdigits") and sys.flags.intmaxdigits == -1: + sys.setintmaxdigits(4096) + +Affected APIs +------------- + +The limition only applies to slow conversions between :class:`int` and +class:`str`: + +* ``int(string)`` with default base 10. +* ``int(string, base)`` for all bases that are not power of 2. +* ``str(integer)`` +* ``repr(integer)`` +* any other string conversion to base 10, for example ``f"{integer}"``, + ``"{}".format(integer)``, or ``"%d" % integer``. + +The limitations do not apply to functions with a linear algorithm: + +* ``int(string, base)`` with base 2, 4, 8, 16, or 32 +* :func:`int.from_bytes` and :func:`int.to_bytes` +* :func:`hex`, :func:`oct`, :func:`bin` (the resulting string may consume + a substantial amount of memory) +* :ref:`formatspec` for hex, octet, and binary types +* :class:`str` to :class:`float` +* :class:`str` to :class:`decimal.Decimal` + + .. rubric:: Footnotes .. [1] Additional information on these special methods may be found in the Python diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 8eede72b814bb2..06aeecaaba8f88 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -729,9 +729,10 @@ always available. .. function:: getintmaxdigits() - Return limit for int digits, :func:`setintmaxdigits` TODO + Return current global value for :ref:`int maximum digits limitation + `. See also :func:`setintmaxdigits` - .. versionadded:: 3.9 + .. versionadded:: 3.11 .. function:: getrefcount(object) @@ -1328,7 +1329,8 @@ always available. .. function:: setintmaxdigits(n) - Set maximum amount of int digits, :func:`getintmaxdigits` TODO + Set global interpreter limit for :ref:`int maximum digits limitation + `. See also :func:`getintmaxdigits` .. versionadded:: 3.9 diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index a73311fe29d515..6888efcae45fd9 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -505,8 +505,8 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. - * ``-X intmaxdigits`` to enable or disable int conversion limit. - See also :envvar:`PYTHONPROFILEIMPORTTIME`. + * ``-X intmaxdigits`` configures :ref:`int maximum digits limitation + `. See also :envvar:`PYTHONPROFILEIMPORTTIME`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded @@ -767,7 +767,8 @@ conflict. .. envvar:: PYTHONINTMAXDIGITS - TODO + If this variable is set to an integer, it is used to configure the interpreter's + global :ref:`int maximum digits limitation `. .. versionadded:: 3.11 From e17e93bd36d5583e82095a86c6b4479fbba2f40e Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 19 Jan 2022 18:51:46 +0100 Subject: [PATCH 05/45] Basic documentation for sys functions --- Python/sysmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index bf5761d2a66baa..ef251aa5566dfb 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1675,7 +1675,7 @@ sys_mdebug_impl(PyObject *module, int flag) /*[clinic input] sys.getintmaxdigits -TODO +Get value of integer maximum digits limit. [clinic start generated code]*/ static PyObject * @@ -1691,7 +1691,7 @@ sys.setintmaxdigits maxdigits: Py_ssize_t -TODO +Set value of integer maximum digits limit. [clinic start generated code]*/ static PyObject * From fbd14b753d2a27d51a89322c4f2aaf5ad24a0102 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 20 Jan 2022 15:14:50 +0100 Subject: [PATCH 06/45] Use ValueError, ignore underscore, scale limit --- Doc/library/functions.rst | 6 ++-- Doc/library/stdtypes.rst | 47 +++++++++++++++++++++---------- Lib/test/test_int.py | 24 +++++++++++++--- Lib/test/test_json/test_decode.py | 2 +- Lib/test/test_xmlrpc.py | 2 +- Objects/longobject.c | 24 ++++++++++++---- 6 files changed, 77 insertions(+), 28 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index deb12c9fee7aae..edcf80ba3a9eb8 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -911,8 +911,10 @@ are always available. They are listed here in alphabetical order. The delegation to :meth:`__trunc__` is deprecated. .. versionchanged:: 3.12 - :class:`int` string inputs are now limited, see :ref:`int maximum - digits limitation `. + :class:`int` string inputs and string representation can be limited. + A :exc:`ValueError` is raised when the input or string representation + exceeds the limit. See :ref:`int maximum + digits limitation ` for more information. .. function:: isinstance(object, classinfo) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 5f9219b8b9eec1..b3cf1361120fa7 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5466,16 +5466,31 @@ Integer maximum digits limitation ================================= CPython has a global limit for converting between :class:`int` and class:`str` -to mitigate denial of service attacks. The limit is necessary because there -exists no efficient algorithm, that can convert a string to an integer or +to mitigate denial of service attacks. The limit is necessary because Python's +integer type is an abitrary length number (also known as bignum). There +exists no efficient algorithm that can convert a string to an integer or an integer to a string in linear time, unless the base is a power of *2*. Even the best known algorithms for base *10* have sub-quadratic complexity. A large -input like:: - - int('1' * 500_000) +input like ``int('1' * 500_000)`` takes about a second at 100% CPU load on +an X86_64 CPU from 2020 with 4.2 GHz max frequency. + +The limit value uses base 10 as a reference point and scales with base. +That means :class:`int` accepts longer input strings for smaller bases and +fails earlier for larger bases. Underscores in input strings don't count +towards the limit. + +When an operation exceeds the limit, an :exc:`ValueError` is raised:: + + >>> sys.setintmaxdigits(2048) + >>> i = 10 ** 2047 + >>> len(str(i)) + 2048 + >>> i = 10 ** 2048 + >>> len(str(i)) + Traceback (most recent call last): + ... + ValueError: input exceeds maximum integer digit limit -takes about a second at 100% CPU load on an X86_64 CPU from 2020 with 4.2 GHz -max frequency. Configure limitations --------------------- @@ -5494,13 +5509,8 @@ Configure limitations precedence. The flag defaults to *-1*. * :func:`sys.getintmaxdigits` and :func:`sys.setintmaxdigits` are getter - and setter for interpreter-wide limit. - -Recommended configuration:: - - import sys - if hasattr(sys.flags, "intmaxdigits") and sys.flags.intmaxdigits == -1: - sys.setintmaxdigits(4096) + and setter for interpreter-wide limit. Subinterpreters have their own + limit. Affected APIs ------------- @@ -5521,10 +5531,17 @@ The limitations do not apply to functions with a linear algorithm: * :func:`int.from_bytes` and :func:`int.to_bytes` * :func:`hex`, :func:`oct`, :func:`bin` (the resulting string may consume a substantial amount of memory) -* :ref:`formatspec` for hex, octet, and binary types +* :ref:`formatspec` for hex, octet, and binary numbers * :class:`str` to :class:`float` * :class:`str` to :class:`decimal.Decimal` +Recommended configuration +------------------------- + + import sys + if hasattr(sys.flags, "intmaxdigits") and sys.flags.intmaxdigits == -1: + sys.setintmaxdigits(4096) + .. rubric:: Footnotes diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index f404ed81cc242d..8f387a6ae84b6e 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -591,24 +591,40 @@ def _test_maxdigits(self, c): i = c('1' * 100_000) str(i) - # OverflowError def check(i, base=None): - with self.assertRaises(OverflowError): + with self.assertRaises(ValueError): if base is None: c(i) else: c(i, base) - maxdigits = 1024 + maxdigits = 2048 with support.setintmaxdigits(maxdigits): + assert maxdigits == sys.getintmaxdigits() check('1' * (maxdigits + 1)) check('+' + '1' * (maxdigits + 1)) check('1' * (maxdigits + 1)) i = 10 ** maxdigits - with self.assertRaises(OverflowError): + with self.assertRaises(ValueError): str(i) + # ignore power of two + for base in (2, 4, 8, 16, 32): + c('1' * (maxdigits + 1), base) + c('1' * 100_000, base) + + # limit ignores underscores + s = '1111_' * ((maxdigits) // 4) + s = s[:-1] + int(s) + check(s + '1') + + # limit is in equivalent of base 10 digits + s = '1' * 2147 + assert len(str(int(s, 9))) == maxdigits + int(s + '1', 9) + def test_maxdigits(self): self._test_maxdigits(int) self._test_maxdigits(IntSubclass) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 1fade1179b28dd..861ee88e76febd 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -101,7 +101,7 @@ def test_limit_int(self): maxdigits = 5000 with support.setintmaxdigits(maxdigits): self.loads('1' * maxdigits) - with self.assertRaises(OverflowError): + with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 1f795862c97130..eba55c8b725946 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -293,7 +293,7 @@ def test_limit_int(self): check = self.check_loads with self.assertRaises(OverflowError): check('123456780123456789', None) - with self.assertRaises(OverflowError): + with self.assertRaises(ValueError): maxdigits = 5000 with support.setintmaxdigits(maxdigits): s = '1' * (maxdigits + 1) diff --git a/Objects/longobject.c b/Objects/longobject.c index 9990cc9e0343a1..328c7e5aa7a3ed 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1819,8 +1819,8 @@ long_to_decimal_string_internal(PyObject *aa, PyInterpreterState *interp = _PyInterpreterState_GET(); if ((interp->intmaxdigits > 0) && (strlen > interp->intmaxdigits)) { Py_DECREF(scratch); - PyErr_SetString(PyExc_OverflowError, - "too many digits in integer"); + PyErr_SetString(PyExc_ValueError, + "input exceeds maximum integer digit limit"); return -1; } } @@ -2434,6 +2434,7 @@ digit beyond the first. twodigits c; /* current input character */ Py_ssize_t size_z; Py_ssize_t digits = 0; + Py_ssize_t underscores = 0; int i; int convwidth; twodigits convmultmax, convmult; @@ -2470,6 +2471,7 @@ digit beyond the first. while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base || *scan == '_') { if (*scan == '_') { + ++underscores; if (prev == '_') { /* Only one underscore allowed. */ str = lastdigit + 1; @@ -2490,12 +2492,24 @@ digit beyond the first. goto onError; } - slen = scan - str; + /* intmaxdigits limit ignores underscores and uses base 10 + * as reference point. + * For other bases slen is transformed into base 10 equivalents. + * Our string to integer conversion algorithm scales less than + * linear with base value, for example int('1' * 300_000", 30) + * is slightly more than five times slower than int(..., 5). + * The naive scaling "slen / 10 * base" is close enough to + * compensate. + */ + slen = scan - str - underscores; + if (base != 10) { + slen = (Py_ssize_t)(slen / 10 * base); + } if (slen > _PY_LONG_MAX_DIGITS_THRESHOLD) { PyInterpreterState *interp = _PyInterpreterState_GET(); if ((interp->intmaxdigits > 0 ) && (slen > interp->intmaxdigits)) { - PyErr_SetString(PyExc_OverflowError, - "too many digits in integer"); + PyErr_SetString(PyExc_ValueError, + "input exceeds maximum integer digit limit"); return NULL; } } From dd74d708db364242291a262ff57730ce8b2dd2de Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 20 Jan 2022 16:55:18 +0100 Subject: [PATCH 07/45] Fix CI --- Doc/library/stdtypes.rst | 4 +++- Python/clinic/sysmodule.c.h | 6 +++--- Python/sysmodule.c | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b3cf1361120fa7..7a0399997e1ea4 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5477,7 +5477,7 @@ an X86_64 CPU from 2020 with 4.2 GHz max frequency. The limit value uses base 10 as a reference point and scales with base. That means :class:`int` accepts longer input strings for smaller bases and fails earlier for larger bases. Underscores in input strings don't count -towards the limit. +towards the limit. When an operation exceeds the limit, an :exc:`ValueError` is raised:: @@ -5538,6 +5538,8 @@ The limitations do not apply to functions with a linear algorithm: Recommended configuration ------------------------- +Example:: + import sys if hasattr(sys.flags, "intmaxdigits") and sys.flags.intmaxdigits == -1: sys.setintmaxdigits(4096) diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 9983c9539110fe..4e20f703721219 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -749,7 +749,7 @@ PyDoc_STRVAR(sys_getintmaxdigits__doc__, "getintmaxdigits($module, /)\n" "--\n" "\n" -"TODO"); +"Get value of integer maximum digits limit."); #define SYS_GETINTMAXDIGITS_METHODDEF \ {"getintmaxdigits", (PyCFunction)sys_getintmaxdigits, METH_NOARGS, sys_getintmaxdigits__doc__}, @@ -767,7 +767,7 @@ PyDoc_STRVAR(sys_setintmaxdigits__doc__, "setintmaxdigits($module, /, maxdigits)\n" "--\n" "\n" -"TODO"); +"Set value of integer maximum digits limit."); #define SYS_SETINTMAXDIGITS_METHODDEF \ {"setintmaxdigits", _PyCFunction_CAST(sys_setintmaxdigits), METH_FASTCALL|METH_KEYWORDS, sys_setintmaxdigits__doc__}, @@ -1328,4 +1328,4 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=43b44240211afe95 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6077bf7ef08d3318 input=a9049054013a1b77]*/ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index ef251aa5566dfb..dcfa7185a517e1 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1680,7 +1680,7 @@ Get value of integer maximum digits limit. static PyObject * sys_getintmaxdigits_impl(PyObject *module) -/*[clinic end generated code: output=be8245491b631377 input=a5e40c7ebebc3bc2]*/ +/*[clinic end generated code: output=be8245491b631377 input=4c6cf29e9858e10e]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); return PyLong_FromSsize_t(interp->intmaxdigits); @@ -1696,7 +1696,7 @@ Set value of integer maximum digits limit. static PyObject * sys_setintmaxdigits_impl(PyObject *module, Py_ssize_t maxdigits) -/*[clinic end generated code: output=f08310ce0abd3fc7 input=6c9f05282da2c64e]*/ +/*[clinic end generated code: output=f08310ce0abd3fc7 input=66814100429a2b99]*/ { PyThreadState *tstate = _PyThreadState_GET(); if ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)) { From 0e01461a941dbc64cb7b89034b2b79da166123c7 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 1 Aug 2022 20:32:26 +0200 Subject: [PATCH 08/45] Address Greg's review --- Doc/library/sys.rst | 4 ++-- Objects/longobject.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 06aeecaaba8f88..ec3b1355b1df2f 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -544,7 +544,7 @@ always available. .. versionchanged:: 3.11 Added the ``safe_path`` attribute for :option:`-P` option. - .. versionchanged:: 3.12 + .. versionchanged:: 3.11 Added ``intmaxdigits`` attribute @@ -1332,7 +1332,7 @@ always available. Set global interpreter limit for :ref:`int maximum digits limitation `. See also :func:`getintmaxdigits` - .. versionadded:: 3.9 + .. versionadded:: 3.11 .. function:: setprofile(profilefunc) diff --git a/Objects/longobject.c b/Objects/longobject.c index 328c7e5aa7a3ed..f48c0f7a350338 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6153,7 +6153,7 @@ PyLong_GetInfo(void) PyLong_FromLong(sizeof(digit))); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(_PY_LONG_DEFAULT_MAX_DIGITS)); - PyStructSequence_SET_ITEM(int_info, field++, + PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(_PY_LONG_MAX_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); From 0b21e5f1dccf0c6ff1680658aa1c18c2d98d4ac8 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 1 Aug 2022 23:27:34 +0200 Subject: [PATCH 09/45] Fix sys.flags len and docs --- Doc/library/stdtypes.rst | 4 ++-- Python/sysmodule.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 7a0399997e1ea4..6602d55b903508 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5465,7 +5465,7 @@ types, where they are relevant. Some of these are not reported by the Integer maximum digits limitation ================================= -CPython has a global limit for converting between :class:`int` and class:`str` +CPython has a global limit for converting between :class:`int` and :class:`str` to mitigate denial of service attacks. The limit is necessary because Python's integer type is an abitrary length number (also known as bignum). There exists no efficient algorithm that can convert a string to an integer or @@ -5516,7 +5516,7 @@ Affected APIs ------------- The limition only applies to slow conversions between :class:`int` and -class:`str`: +:class:`str`: * ``int(string)`` with default base 10. * ``int(string, base)`` for all bases that are not power of 2. diff --git a/Python/sysmodule.c b/Python/sysmodule.c index dcfa7185a517e1..fd349d8ebd152a 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2735,7 +2735,7 @@ static PyStructSequence_Desc flags_desc = { "sys.flags", /* name */ flags__doc__, /* doc */ flags_fields, /* fields */ - 17 + 18 }; static int From 3b38abe7da4fc2887028bdf3e31f6b3d6d46c145 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 2 Aug 2022 12:13:46 -0700 Subject: [PATCH 10/45] Keep the warning, but remove advice about limiting input length in the json module. "It's advised to limit the input to a sensible length." isn't very helpful for the JSON module as, while technically true, the limit needed to avoid hitting things like the int<->str base 10 conversion this issue is about would make a significant percentage of actual JSON used in the real world impractical. we're limiting the int/str conversion length, that is the best that can be done here - unless someone wants to implement a feature request for JSON to reject int looking fields at a much smaller base10 length limit. --- Doc/library/json.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index d0228c53ecaf69..93bfca1dd227a4 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -26,7 +26,7 @@ is a lightweight data interchange format inspired by .. warning:: Be cautious when parsing JSON data from untrusted sources. A malicious JSON string may cause the decoder to consume considerable CPU and memory - resources. It's advised to limit the input to a sensible length. + resources. :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. From 37193ed0bbcf0adc7611ae464e0d2a7fa83219a5 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 4 Aug 2022 17:49:26 -0700 Subject: [PATCH 11/45] Renamed the APIs & too many other refactorings. The lumps way too many changes together in one commit, but extricating them into a series of individual pieces doesn't seem worthwhile at this stage, it could be done by picking and choosing into a new branch if there is a reason to do so. Summarizing off the top of my head after rereading diffs too many times: - Renamed the environment variable, -X flag, and names of the new APIs. - Cleaned up the documentation. - Refactored and improved some tests. - Moved the definitions of the default and threshold to a non-public header. - Set the default to 2000 instead of disabled as the Steering Council agreed having a default limit made the most sense for our users. - Lowered the minimum threshold because there seems no harm in allowing people to have a lower limit. - Fixed the other-bases base-10 digits equivalent estimate to be consistent with the actual potential base-10 value instead of wildly off. Aimed for consistency of number size here rather than any attempt to match the performance between bases as that is easier for users to understand and doesn't change if our algorithm implementations change. - Left a couple of notes about what changes look good or not good for a security fix backported into a stable release. It appears the PyConfig struct is a public API so we cannot change its definition mid-release. Backporting is going to involve something outside of the normal code path wise to plumb the new value through. Ugh. Questions: * [ ] `test_embed` is failing in an odd manner... what's up with that? * [ ] Is `_pydecimal` important? This change makes some of its APIs not work based on the values being used. The unittest suite that tests both the C and Python Decimal implementations revealed this and needed a higher limit set. If it relies on ints for arbitrary precision Decimal numbers, limits are frequently going to make it practically unusuable. * [ ] Backporting... we should produce what we believe a 3.11/3.10/3.9 backport will look like in its own PR branch before making a final decision. The most annoying difficulty being the `struct PyConfig` issue. * [ ] I pondered having values >0 but ` command line option or - :envvar:`PYTHONINTMAXDIGITS` env var. + Configured by the :option:`-X int_max_base10_digits <-X>` command line + flag or the :envvar:`PYTHONINTMAXBASE10DIGITS` environment varable. Default: ``-1``. diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index edcf80ba3a9eb8..7d06b321b8a555 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -910,11 +910,11 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.11 The delegation to :meth:`__trunc__` is deprecated. - .. versionchanged:: 3.12 - :class:`int` string inputs and string representation can be limited. - A :exc:`ValueError` is raised when the input or string representation - exceeds the limit. See :ref:`int maximum - digits limitation ` for more information. + .. versionchanged:: 3.11 + :class:`int` string inputs and string representation can be limited + to help avoid denial of service attacks. A :exc:`ValueError` is raised + when an input or string representation exceeds the limit. See :ref:`int + maximum digits limitation ` for more information. .. function:: isinstance(object, classinfo) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 93bfca1dd227a4..3102cf6a0561a5 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -26,7 +26,7 @@ is a lightweight data interchange format inspired by .. warning:: Be cautious when parsing JSON data from untrusted sources. A malicious JSON string may cause the decoder to consume considerable CPU and memory - resources. + resources. Limiting the size of data to be parsed is recommended. :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. @@ -258,9 +258,11 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - .. versionchanged:: 3.9 - The default implementation of *parse_int* limits the input string to - 5,000 digits to prevent denial of service attacks. + .. versionchanged:: 3.11 + The default implementation of *parse_int* limits the maximum length of + the integer string via the interpreter's :ref:`int maximum digits + limitation ` mechanism to help avoid denial of + service attacks. *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 6602d55b903508..80e50f18ba2291 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5460,28 +5460,28 @@ types, where they are relevant. Some of these are not reported by the [] -.. _intmaxdigits: +.. _int_max_base10_digits: Integer maximum digits limitation ================================= CPython has a global limit for converting between :class:`int` and :class:`str` -to mitigate denial of service attacks. The limit is necessary because Python's -integer type is an abitrary length number (also known as bignum). There -exists no efficient algorithm that can convert a string to an integer or -an integer to a string in linear time, unless the base is a power of *2*. Even -the best known algorithms for base *10* have sub-quadratic complexity. A large -input like ``int('1' * 500_000)`` takes about a second at 100% CPU load on -an X86_64 CPU from 2020 with 4.2 GHz max frequency. - -The limit value uses base 10 as a reference point and scales with base. -That means :class:`int` accepts longer input strings for smaller bases and -fails earlier for larger bases. Underscores in input strings don't count -towards the limit. - -When an operation exceeds the limit, an :exc:`ValueError` is raised:: - - >>> sys.setintmaxdigits(2048) +to mitigate denial of service attacks. The limit is necessary because CPython's +integer type is an abitrary length number (commonly known as a bignum) stored +in binary form. There exists no algorithm that can convert a string to a binary +integer or a binary integer to a string in linear time, unless the base is a +power of *2*. Even the best known algorithms for base *10* have sub-quadratic +complexity. Converting a large value such as ``int('1' * 500_000)`` can take +over a second on a fast CPU. + +The limit value uses base 10 as a reference point and scales with base. That +means an :class:`int` conversion accepts longer strings for smaller bases and +shorter strings for larger bases. Underscores and the sign in strings don't +count towards the limit. + +When an operation exceeds the limit, a :exc:`ValueError` is raised:: + + >>> sys.set_int_max_base10_digits(2048) >>> i = 10 ** 2047 >>> len(str(i)) 2048 @@ -5489,60 +5489,69 @@ When an operation exceeds the limit, an :exc:`ValueError` is raised:: >>> len(str(i)) Traceback (most recent call last): ... - ValueError: input exceeds maximum integer digit limit + ValueError: exceeds maximum integer base 10 digit limit - -Configure limitations +Configuring the limit --------------------- -* :data:`sys.int_info.default_max_digits` is the compiled-in default value. -* :data:`sys.int_info.max_digits_check_threshold` is the minimum limit for - digit limitation. For performance reasons Python does not check - -* :envvar:`PYTHONINTMAXDIGITS`, e.g. ``PYTHONINTMAXDIGITS=4096 python3`` to - set the limit to ``4096`` or ``PYTHONINTMAXDIGITS=0 python3`` to disable - the limitation -* :option:`-X intmaxdigits <-X>`, e.g. ``python3 -X intmaxdigits=4096`` -* :data:`sys.flags.intmaxdigits` contains the value of - :envvar:`PYTHONINTMAXDIGITS` or :option:`-X intmaxdigits <-X>`. In case - both the env var and the ``-X`` option are set, the ``-X`` option takes - precedence. The flag defaults to *-1*. - -* :func:`sys.getintmaxdigits` and :func:`sys.setintmaxdigits` are getter - and setter for interpreter-wide limit. Subinterpreters have their own +* :data:`sys.int_info.default_max_base10_digits` is the compiled-in default + limit. +* :data:`sys.int_info.base10_digits_check_threshold` is the minimum accepted + value for the limit. + +* :envvar:`PYTHONINTMAXBASE10DIGITS`, e.g. + ``PYTHONINTMAXBASE10DIGITS=4321 python3`` to set the limit to ``4321`` or + ``PYTHONINTMAXBASE10DIGITS=0 python3`` to disable the limitation. +* :option:`-X int_max_base10_digits <-X>`, e.g. + ``python3 -X int_max_base10_digits=4321`` +* :data:`sys.flags.int_max_base10_digits` contains the value of + :envvar:`PYTHONINTMAXBASE10DIGITS` or + :option:`-X int_max_base10_digits <-X>`. In case both the env var and the + ``-X`` option are set, the ``-X`` option takes precedence. The value of + *-1* indicates that both were unset and the value of + :data:`sys.int_info.default_max_base10_digits` will be used. + +* :func:`sys.get_int_max_base10_digits` and + :func:`sys.set_int_max_base10_digits` are a getter and setter for + the interpreter-wide limit. Subinterpreters have their own limit. Affected APIs ------------- -The limition only applies to slow conversions between :class:`int` and -:class:`str`: +The limition only applies to potentially slow conversions between :class:`int` +and :class:`str`: * ``int(string)`` with default base 10. -* ``int(string, base)`` for all bases that are not power of 2. -* ``str(integer)`` +* ``int(string, base)`` for all bases that are not a power of 2. +* ``str(integer)``. * ``repr(integer)`` * any other string conversion to base 10, for example ``f"{integer}"``, ``"{}".format(integer)``, or ``"%d" % integer``. The limitations do not apply to functions with a linear algorithm: -* ``int(string, base)`` with base 2, 4, 8, 16, or 32 -* :func:`int.from_bytes` and :func:`int.to_bytes` -* :func:`hex`, :func:`oct`, :func:`bin` (the resulting string may consume - a substantial amount of memory) -* :ref:`formatspec` for hex, octet, and binary numbers -* :class:`str` to :class:`float` -* :class:`str` to :class:`decimal.Decimal` +* ``int(string, base)`` with base 2, 4, 8, 16, or 32. +* :func:`int.from_bytes` and :func:`int.to_bytes`. +* :func:`hex`, :func:`oct`, :func:`bin`. +* :ref:`formatspec` for hex, octal, and binary numbers. +* :class:`str` to :class:`float`. +* :class:`str` to :class:`decimal.Decimal`. Recommended configuration ------------------------- +The default :data:`sys.int_info.default_max_base10_digits` is expected to be +reasonable for most applications. If your application requires a different +limit, use Python version and implementation agnostic code to set it. + Example:: - import sys - if hasattr(sys.flags, "intmaxdigits") and sys.flags.intmaxdigits == -1: - sys.setintmaxdigits(4096) + >>> import sys + >>> if hasattr(sys, "set_int_max_base10_digits"): + ... current_limit = sys.get_int_max_base10_digits() + ... if not current_limit or current_limit > 4321: + ... sys.set_int_max_base10_digits(4321) .. rubric:: Footnotes diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index ec3b1355b1df2f..425b2cd19cc54a 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -502,27 +502,27 @@ always available. The :term:`named tuple` *flags* exposes the status of command line flags. The attributes are read only. - ============================= ================================================================ - attribute flag - ============================= ================================================================ - :const:`debug` :option:`-d` - :const:`inspect` :option:`-i` - :const:`interactive` :option:`-i` - :const:`isolated` :option:`-I` - :const:`optimize` :option:`-O` or :option:`-OO` - :const:`dont_write_bytecode` :option:`-B` - :const:`no_user_site` :option:`-s` - :const:`no_site` :option:`-S` - :const:`ignore_environment` :option:`-E` - :const:`verbose` :option:`-v` - :const:`bytes_warning` :option:`-b` - :const:`quiet` :option:`-q` - :const:`hash_randomization` :option:`-R` - :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) - :const:`utf8_mode` :option:`-X utf8 <-X>` - :const:`safe_path` :option:`-P` - :const:`intmaxdigits` :option:`-X intmaxdigits <-X>` (default: *-1*) - ============================= ================================================================ + ================================== ====================================================================================================== + attribute flag + ================================== ====================================================================================================== + :const:`debug` :option:`-d` + :const:`inspect` :option:`-i` + :const:`interactive` :option:`-i` + :const:`isolated` :option:`-I` + :const:`optimize` :option:`-O` or :option:`-OO` + :const:`dont_write_bytecode` :option:`-B` + :const:`no_user_site` :option:`-s` + :const:`no_site` :option:`-S` + :const:`ignore_environment` :option:`-E` + :const:`verbose` :option:`-v` + :const:`bytes_warning` :option:`-b` + :const:`quiet` :option:`-q` + :const:`hash_randomization` :option:`-R` + :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) + :const:`utf8_mode` :option:`-X utf8 <-X>` + :const:`safe_path` :option:`-P` + :const:`int_max_base10_digits` :option:`-X int_max_base10_digits <-X>` (:ref:`int maximum digits limitation `) + ================================== ====================================================================================================== .. versionchanged:: 3.2 Added ``quiet`` attribute for the new :option:`-q` flag. @@ -545,7 +545,7 @@ always available. Added the ``safe_path`` attribute for :option:`-P` option. .. versionchanged:: 3.11 - Added ``intmaxdigits`` attribute + Added the ``int_max_base10_digits`` attribute. .. data:: float_info @@ -727,10 +727,10 @@ always available. .. versionadded:: 3.6 -.. function:: getintmaxdigits() +.. function:: get_int_max_base10_digits() Return current global value for :ref:`int maximum digits limitation - `. See also :func:`setintmaxdigits` + `. See also :func:`set_int_max_base10_digits` .. versionadded:: 3.11 @@ -1007,26 +1007,32 @@ always available. .. tabularcolumns:: |l|L| - +-------------------------------------+-----------------------------------------------+ - | Attribute | Explanation | - +=====================================+===============================================+ - | :const:`bits_per_digit` | number of bits held in each digit. Python | - | | integers are stored internally in base | - | | ``2**int_info.bits_per_digit`` | - +-------------------------------------+-----------------------------------------------+ - | :const:`sizeof_digit` | size in bytes of the C type used to | - | | represent a digit | - +-------------------------------------+-----------------------------------------------+ - | :const:`default_max_digits` | default value for :func:`sys.getintmaxdigits` | - +-------------------------------------+-----------------------------------------------+ - | :const:`max_digits_check_threshold` | minimum value value for | - | | :func:`sys.setintmaxdigits` | - +-------------------------------------+-----------------------------------------------+ + +----------------------------------------+-----------------------------------------------+ + | Attribute | Explanation | + +========================================+===============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +----------------------------------------+-----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +----------------------------------------+-----------------------------------------------+ + | :const:`default_max_base10_digits` | default value for | + | | :func:`sys.get_int_max_base10_digits` when it | + | | is not otherwise explicitly configured. | + +----------------------------------------+-----------------------------------------------+ + | :const:`base10_digits_check_threshold` | minimum non-zero value for | + | | :func:`sys.set_int_max_base10_digits`, | + | | :envvar:`PYTHONINTMAXBASE10DIGITS`, or | + | | :option:`-X int_max_base10_digits <-X>`. | + | | Supplied positive values less than this will | + | | be silently rounded up to this value. | + +----------------------------------------+-----------------------------------------------+ .. versionadded:: 3.1 .. versionchanged:: 3.11 - Added ``default_max_digits`` and ``max_digits_check_threshold``. + Added ``default_max_base10_digits`` and ``base10_digits_check_threshold``. .. data:: __interactivehook__ @@ -1327,10 +1333,10 @@ always available. .. availability:: Unix. -.. function:: setintmaxdigits(n) +.. function:: set_int_max_base10_digits(n) Set global interpreter limit for :ref:`int maximum digits limitation - `. See also :func:`getintmaxdigits` + `. See also :func:`get_int_max_base10_digits` .. versionadded:: 3.11 diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 6888efcae45fd9..ffa90052568bf6 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -505,13 +505,13 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. - * ``-X intmaxdigits`` configures :ref:`int maximum digits limitation - `. See also :envvar:`PYTHONPROFILEIMPORTTIME`. + * ``-X int_max_base10_digits`` configures :ref:`int maximum digits limitation + `. See also :envvar:`PYTHONINTMAXBASE10DIGITS`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded application. Typical usage is ``python3 -X importtime -c 'import - asyncio'``. See also :envvar:`PYTHONINTMAXDIGITS`. + asyncio'``. See also :envvar:`PYTHONPROFILEIMPORTTIME`. * ``-X dev``: enable :ref:`Python Development Mode `, introducing additional runtime checks that are too expensive to be enabled by default. @@ -584,6 +584,9 @@ Miscellaneous options .. versionadded:: 3.11 The ``-X frozen_modules`` option. + .. versionadded:: 3.11 + The ``-X int_max_base10_digits`` option. + .. versionadded:: 3.12 The ``-X perf`` option. @@ -765,10 +768,10 @@ conflict. .. versionadded:: 3.2.3 -.. envvar:: PYTHONINTMAXDIGITS +.. envvar:: PYTHONINTMAXBASE10DIGITS If this variable is set to an integer, it is used to configure the interpreter's - global :ref:`int maximum digits limitation `. + global :ref:`int maximum digits limitation `. .. versionadded:: 3.11 diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 1f58c10637a1a7..60dc2d2766381e 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -178,6 +178,7 @@ typedef struct PyConfig { wchar_t *check_hash_pycs_mode; int use_frozen_modules; int safe_path; + int int_max_base10_digits; // NOTE(gpshead): do not backport to stable releases due to struct change. /* --- Path configuration inputs ------------ */ int pathconfig_warnings; @@ -218,10 +219,6 @@ typedef struct PyConfig { // If non-zero, we believe we're running from a source tree. int _is_python_build; - - /* global limit for long digits */ - Py_ssize_t intmaxdigits; - } PyConfig; PyAPI_FUNC(void) PyConfig_InitPythonConfig(PyConfig *config); diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index c8e8c35790d68b..bf3ad9ebe259b6 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -176,7 +176,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; - Py_ssize_t intmaxdigits; + int int_max_base10_digits; /* The following fields are here to avoid allocation during init. The data is exposed through PyInterpreterState pointer fields. diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 67dd5c3b13ec59..c4c2ac41ab9acb 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -11,6 +11,34 @@ extern "C" { #include "pycore_global_objects.h" // _PY_NSMALLNEGINTS #include "pycore_runtime.h" // _PyRuntime +/* + * Default int base conversion size limitation. + * + * Chosen such that this isn't wildly slow on modern hardware: + * % python -m timeit -s 's = "1"*2000; v = int(s)' 'str(int(s))' + * 2000 loops, best of 5: 100 usec per loop + * + * 2000 decimal digits fits a ~6643 bit number. + */ +#define _PY_LONG_DEFAULT_MAX_BASE10_DIGITS 2000 +/* + * Threshold for max digits check. For performance reasons int() and + * int.__str__ don't checks values that are smaller than this + * threshold. Acts as a guaranteed minimum size limit for bignums that + * applications can expect from CPython. + * + * % python -m timeit -s 's = "1"*333; v = int(s)' 'str(int(s))' + * 100000 loops, best of 5: 3.94 usec per loop + * + * 333 decimal digits fits a ~1106 bit number. + */ +#define _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD 333 + +#if ((_PY_LONG_DEFAULT_MAX_BASE10_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_BASE10_DIGITS < _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_BASE10_DIGITS smaller than threshold." +#endif + /* runtime lifecycle */ diff --git a/Include/longobject.h b/Include/longobject.h index f3c6803eeb2bdb..e559e238ae5a35 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -71,16 +71,6 @@ PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLong(PyObject *); PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLongMask(PyObject *); PyAPI_FUNC(long long) PyLong_AsLongLongAndOverflow(PyObject *, int *); -/* Default limitation */ -#ifndef _PY_LONG_DEFAULT_MAX_DIGITS -# define _PY_LONG_DEFAULT_MAX_DIGITS 0 -#endif -/* Threshold for max digits check. For performance reasons int() and - int.__str__ don't checks values that are smaller than the - threshold. For common cases it avoids a lookup of the interpreter - state in a hot path */ -#define _PY_LONG_MAX_DIGITS_THRESHOLD 1024 - PyAPI_FUNC(PyObject *) PyLong_FromString(const char *, char **, int); /* These aren't really part of the int object, but they're handy. The diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 241ca2f6404a44..b5952798e36cfb 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2344,12 +2344,11 @@ def sleeping_retry(timeout, err_msg=None, /, @contextlib.contextmanager -def setintmaxdigits(maxdigits): - """Set integer max digits limit - """ - current = sys.getintmaxdigits() +def set_int_max_base10_digits(max_digits): + """Temporarily change the integer maximum base 10 digits limit.""" + current = sys.get_int_max_base10_digits() try: - sys.setintmaxdigits(maxdigits) + sys.set_int_max_base10_digits(max_digits) yield finally: - sys.setintmaxdigits(current) + sys.set_int_max_base10_digits(current) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 1c5ccf7db8c195..5a4985a534a956 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -866,35 +866,36 @@ def test_parsing_error(self): self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) self.assertNotEqual(proc.returncode, 0) - def test_intmaxdigits(self): - code = "import sys; print(sys.flags.intmaxdigits, sys.getintmaxdigits())" + def test_int_max_base10_digits(self): + code = "import sys; print(sys.flags.int_max_base10_digits, sys.get_int_max_base10_digits())" - assert_python_failure('-X', 'intmaxdigits', '-c', code) - assert_python_failure('-X', 'intmaxdigits=foo', '-c', code) - assert_python_failure('-X', 'intmaxdigits=100', '-c', code) + assert_python_failure('-X', 'int_max_base10_digits', '-c', code) + assert_python_failure('-X', 'int_max_base10_digits=foo', '-c', code) + assert_python_failure('-X', 'int_max_base10_digits=100', '-c', code) - assert_python_failure('-c', code, PYTHONINTMAXDIGITS='foo') - assert_python_failure('-c', code, PYTHONINTMAXDIGITS='100') + assert_python_failure('-c', code, PYTHONINTMAXBASE10DIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXBASE10DIGITS='100') def res2int(res): out = res.out.strip().decode("utf-8") return tuple(int(i) for i in out.split()) res = assert_python_ok('-c', code) - self.assertEqual(res2int(res), (-1, sys.getintmaxdigits())) - res = assert_python_ok('-X', 'intmaxdigits=0', '-c', code) + self.assertEqual(res2int(res), (-1, sys.get_int_max_base10_digits())) + res = assert_python_ok('-X', 'int_max_base10_digits=0', '-c', code) self.assertEqual(res2int(res), (0, 0)) - res = assert_python_ok('-X', 'intmaxdigits=4000', '-c', code) + res = assert_python_ok('-X', 'int_max_base10_digits=4000', '-c', code) self.assertEqual(res2int(res), (4000, 4000)) - res = assert_python_ok('-X', 'intmaxdigits=100000', '-c', code) + res = assert_python_ok('-X', 'int_max_base10_digits=100000', '-c', code) self.assertEqual(res2int(res), (100000, 100000)) - res = assert_python_ok('-c', code, PYTHONINTMAXDIGITS='0') + res = assert_python_ok('-c', code, PYTHONINTMAXBASE10DIGITS='0') self.assertEqual(res2int(res), (0, 0)) - res = assert_python_ok('-c', code, PYTHONINTMAXDIGITS='4000') + res = assert_python_ok('-c', code, PYTHONINTMAXBASE10DIGITS='4000') self.assertEqual(res2int(res), (4000, 4000)) res = assert_python_ok( - '-X', 'intmaxdigits=6000', '-c', code, PYTHONINTMAXDIGITS='4000' + '-X', 'int_max_base10_digits=6000', '-c', code, + PYTHONINTMAXBASE10DIGITS='4000' ) self.assertEqual(res2int(res), (6000, 6000)) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 7c5964e3d5535d..e8ee0e1d7a8afd 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -2526,6 +2526,15 @@ class CUsabilityTest(UsabilityTest): class PyUsabilityTest(UsabilityTest): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_base10_digits() + sys.set_int_max_base10_digits(7000) + + def tearDown(self): + sys.set_int_max_base10_digits(self._previous_int_limit) + super().tearDown() + class PythonAPItests(unittest.TestCase): def test_abc(self): @@ -4626,6 +4635,15 @@ class CCoverage(Coverage): class PyCoverage(Coverage): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_base10_digits() + sys.set_int_max_base10_digits(7000) + + def tearDown(self): + sys.set_int_max_base10_digits(self._previous_int_limit) + super().tearDown() + class PyFunctionality(unittest.TestCase): """Extra functionality in decimal.py""" diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index ed13c2a50a7374..e36057ffaa38c7 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -434,7 +434,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'install_signal_handlers': 1, 'use_hash_seed': 0, 'hash_seed': 0, - 'intmaxdigits': -1, + 'int_max_base10_digits': -1, 'faulthandler': 0, 'tracemalloc': 0, 'perf_profiling': 0, diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 8f387a6ae84b6e..ccc782a22e37d1 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,3 +1,4 @@ +from math import log import sys import unittest @@ -26,7 +27,6 @@ ("\u0200", ValueError) ] - class IntSubclass(int): pass @@ -577,57 +577,108 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7_8_9', 16), 0x123456789) self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) - def _test_maxdigits(self, c): - maxdigits = sys.getintmaxdigits() - if maxdigits != 0: - # edge cases - c('1' * maxdigits) - c(' ' + '1' * maxdigits) - c('+' + '1' * maxdigits) - self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) - - # disable limitation - with support.setintmaxdigits(0): - i = c('1' * 100_000) + +class IntBase10DigitLimitsTests(unittest.TestCase): + + int_class = int # Override this in subclasses to reuse the suite. + + def setUp(self): + super().setUp() + self._previous_limit = sys.get_int_max_base10_digits() + sys.set_int_max_base10_digits(2048) + + def tearDown(self): + sys.set_int_max_base10_digits(self._previous_limit) + super().tearDown() + + def test_disabled_limit(self): + self.assertGreater(sys.get_int_max_base10_digits(), 0) + self.assertLess(sys.get_int_max_base10_digits(), 20_000) + with support.set_int_max_base10_digits(0): + self.assertEqual(sys.get_int_max_base10_digits(), 0) + i = self.int_class('1' * 20_000) str(i) + self.assertGreater(sys.get_int_max_base10_digits(), 0) - def check(i, base=None): - with self.assertRaises(ValueError): - if base is None: - c(i) - else: - c(i, base) + def test_max_base10_digits_edge_cases(self): + """Ignore the +/- sign and space padding.""" + int_class = self.int_class + maxdigits = sys.get_int_max_base10_digits() - maxdigits = 2048 - with support.setintmaxdigits(maxdigits): - assert maxdigits == sys.getintmaxdigits() - check('1' * (maxdigits + 1)) - check('+' + '1' * (maxdigits + 1)) - check('1' * (maxdigits + 1)) + int_class('1' * maxdigits) + int_class(' ' + '1' * maxdigits) + int_class('1' * maxdigits + ' ') + int_class('+' + '1' * maxdigits) + int_class('-' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + + def check(self, i, base=None): + with self.assertRaises(ValueError): + if base is None: + self.int_class(i) + else: + self.int_class(i, base) + + def test_max_base10_digits(self): + maxdigits = sys.get_int_max_base10_digits() + + self.check('1' * (maxdigits + 1)) + self.check(' ' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1) + ' ') + self.check('+' + '1' * (maxdigits + 1)) + self.check('-' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1)) + + i = 10 ** maxdigits + with self.assertRaises(ValueError): + str(i) + + def test_power_of_two_bases_unlimited(self): + """The limit does not apply to power of 2 bases.""" + maxdigits = sys.get_int_max_base10_digits() + + for base in (2, 4, 8, 16, 32): + with self.subTest(base=base): + self.int_class('1' * (maxdigits + 1), base) + assert maxdigits < 100_000 + self.int_class('1' * 100_000, base) + + def test_underscores_ignored(self): + """The limit ignores underscore separators.""" + maxdigits = sys.get_int_max_base10_digits() + + s = '1111_' * ((maxdigits) // 4) + s = s[:-1] + self.int_class(s) + self.check(s + '1') + + def _other_base_helper(self, base): + int_class = self.int_class + max_digits = sys.get_int_max_base10_digits() + + base_digits = int(max_digits*log(10)/log(base)) + if base > 10: + assert base_digits < max_digits + elif base < 10: + assert base_digits > max_digits + s = '3' * base_digits + self.assertLessEqual(len(str(int_class(s, base))), max_digits) + int_class(f'{s}1', base) + with self.assertRaises(ValueError) as err: + int_class(f'{s}11', base) + + def test_int_from_other_bases(self): + """The limit should scale as an equivalent number of decimal digits.""" + with self.subTest(base=9): + self._other_base_helper(9) + + with self.subTest(base=36): + self._other_base_helper(36) + + +class IntSubclassBase10DigitLimitsTests(IntBase10DigitLimitsTests): + int_class = IntSubclass - i = 10 ** maxdigits - with self.assertRaises(ValueError): - str(i) - - # ignore power of two - for base in (2, 4, 8, 16, 32): - c('1' * (maxdigits + 1), base) - c('1' * 100_000, base) - - # limit ignores underscores - s = '1111_' * ((maxdigits) // 4) - s = s[:-1] - int(s) - check(s + '1') - - # limit is in equivalent of base 10 digits - s = '1' * 2147 - assert len(str(int(s, 9))) == maxdigits - int(s + '1', 9) - - def test_maxdigits(self): - self._test_maxdigits(int) - self._test_maxdigits(IntSubclass) if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 861ee88e76febd..aa8d52f99be33f 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -99,7 +99,7 @@ def test_negative_index(self): def test_limit_int(self): maxdigits = 5000 - with support.setintmaxdigits(maxdigits): + with support.set_int_max_base10_digits(maxdigits): self.loads('1' * maxdigits) with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index b73f697b1dae54..2432f7cac567a8 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -553,12 +553,14 @@ def test_attributes(self): self.assertEqual(len(sys.int_info), 4) self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) self.assertTrue(sys.int_info.sizeof_digit >= 1) - self.assertGreaterEqual(sys.int_info.default_max_digits, 0) - self.assertGreaterEqual(sys.int_info.max_digits_check_threshold, 0) + self.assertGreaterEqual(sys.int_info.default_max_base10_digits, 500) + self.assertGreaterEqual(sys.int_info.base10_digits_check_threshold, 100) + self.assertGreater(sys.int_info.default_max_base10_digits, + sys.int_info.base10_digits_check_threshold) self.assertEqual(type(sys.int_info.bits_per_digit), int) self.assertEqual(type(sys.int_info.sizeof_digit), int) - self.assertIsInstance(sys.int_info.default_max_digits, int) - self.assertIsInstance(sys.int_info.max_digits_check_threshold, int) + self.assertIsInstance(sys.int_info.default_max_base10_digits, int) + self.assertIsInstance(sys.int_info.base10_digits_check_threshold, int) self.assertIsInstance(sys.hexversion, int) self.assertEqual(len(sys.hash_info), 9) @@ -681,7 +683,7 @@ def test_sys_flags(self): "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", "hash_randomization", "isolated", "dev_mode", "utf8_mode", - "warn_default_encoding", "safe_path", "intmaxdigits") + "warn_default_encoding", "safe_path", "int_max_base10_digits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr in ("dev_mode", "safe_path") else int diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index eba55c8b725946..0776f2265f9dd3 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -295,7 +295,7 @@ def test_limit_int(self): check('123456780123456789', None) with self.assertRaises(ValueError): maxdigits = 5000 - with support.setintmaxdigits(maxdigits): + with support.set_int_max_base10_digits(maxdigits): s = '1' * (maxdigits + 1) check(f'{s}', None) diff --git a/Objects/longobject.c b/Objects/longobject.c index f48c0f7a350338..e26c48b17d0c4f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -13,6 +13,7 @@ #include #include +#include #include #include // abs() @@ -36,6 +37,8 @@ medium_value(PyLongObject *x) #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS) +#define _MAX_BASE10_DIGITS_ERROR_FMT "Exceeds digit limit for string conversions: Value uses approximately %zd base 10 digits." + static inline void _Py_DECREF_INT(PyLongObject *op) { @@ -1815,12 +1818,14 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } - if (strlen > _PY_LONG_MAX_DIGITS_THRESHOLD) { + if (strlen > _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if ((interp->intmaxdigits > 0) && (strlen > interp->intmaxdigits)) { + int max_base10_digits = interp->int_max_base10_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_base10_digits > 0) && (strlen_nosign > max_base10_digits)) { Py_DECREF(scratch); - PyErr_SetString(PyExc_ValueError, - "input exceeds maximum integer digit limit"); + PyErr_Format(PyExc_ValueError, _MAX_BASE10_DIGITS_ERROR_FMT, + strlen_nosign); return -1; } } @@ -2268,6 +2273,38 @@ long_from_binary_base(const char **str, int base, PyLongObject **res) return 0; } +/* + * A helper function to precompute valies in a small static log table used + * in PyLong_String. The caller should check if the table is already filled + * in (non-zero value) at the [base] index before calling. + * + * Appropriate values in log_base_table, convwidth_base, and convmultmax_base + * are computed and filled in at index [base]. + */ +static void fill_in_log_conversion_table( + int base, + double *log_base_table, + int *convwidth_base, + twodigits *convmultmax_base) +{ + twodigits convmax = base; + int i = 1; + + log_base_table[base] = (log((double)base) / + log((double)PyLong_BASE)); + for (;;) { + twodigits next = convmax * base; + if (next > PyLong_BASE) { + break; + } + convmax = next; + ++i; + } + convmultmax_base[base] = convmax; + assert(i > 0); + convwidth_base[base] = i; +} + /* Parses an int from a bytestring. Leading and trailing whitespace will be * ignored. * @@ -2276,7 +2313,6 @@ long_from_binary_base(const char **str, int base, PyLongObject **res) * * If unsuccessful, NULL will be returned. */ - PyObject * PyLong_FromString(const char *str, char **pend, int base) { @@ -2338,7 +2374,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { - /* binary bases are not limited by intmaxdigits */ + /* binary bases are not limited by int_max_base10_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2433,7 +2469,7 @@ digit beyond the first. ***/ twodigits c; /* current input character */ Py_ssize_t size_z; - Py_ssize_t digits = 0; + Py_ssize_t digits = 0; // Number of base $base digits in str. Py_ssize_t underscores = 0; int i; int convwidth; @@ -2447,22 +2483,8 @@ digit beyond the first. static twodigits convmultmax_base[37] = {0,}; if (log_base_BASE[base] == 0.0) { - twodigits convmax = base; - int i = 1; - - log_base_BASE[base] = (log((double)base) / - log((double)PyLong_BASE)); - for (;;) { - twodigits next = convmax * base; - if (next > PyLong_BASE) { - break; - } - convmax = next; - ++i; - } - convmultmax_base[base] = convmax; - assert(i > 0); - convwidth_base[base] = i; + fill_in_log_conversion_table( + base, log_base_BASE, convwidth_base, convmultmax_base); } /* Find length of the string of numeric characters. */ @@ -2492,25 +2514,48 @@ digit beyond the first. goto onError; } - /* intmaxdigits limit ignores underscores and uses base 10 - * as reference point. - * For other bases slen is transformed into base 10 equivalents. - * Our string to integer conversion algorithm scales less than - * linear with base value, for example int('1' * 300_000", 30) - * is slightly more than five times slower than int(..., 5). - * The naive scaling "slen / 10 * base" is close enough to - * compensate. + /* The int_max_base10_digits limit ignores underscores. + * + * We compute the worst case number of base 10 digits to + * represent a $digits length number of base $base. The number + * isn't precise. It could be off by 1 and will be imprecise + * for a million digits but is close enough our needs. */ - slen = scan - str - underscores; - if (base != 10) { - slen = (Py_ssize_t)(slen / 10 * base); - } - if (slen > _PY_LONG_MAX_DIGITS_THRESHOLD) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - if ((interp->intmaxdigits > 0 ) && (slen > interp->intmaxdigits)) { - PyErr_SetString(PyExc_ValueError, - "input exceeds maximum integer digit limit"); - return NULL; + { + Py_ssize_t num_decimal_digits; + if (base == 10) { + num_decimal_digits = digits; + } + else // base != 10 && != 2,4,8,16,32 either; those were handled. + { + // float to save space, limiting doesn't require high precision. + // Limits beyond 1,000,000 may be imprecise as a result. + static float decimal_digits_per_base_digits_ratio[37] = {0.0e0,}; + if (decimal_digits_per_base_digits_ratio[base] == 0.0) { + if (log_base_BASE[10] == 0.0) { + fill_in_log_conversion_table( + 10, log_base_BASE, convwidth_base, convmultmax_base); + } + double log_10 = log_base_BASE[10]; + // The log table was already filled in for this earlier. + assert(log_base_BASE[base] != 0.0); + double log_base = log_base_BASE[base]; + decimal_digits_per_base_digits_ratio[base] = ( + log_base / log_10); + } + // digits * log_base / log_10 + num_decimal_digits = (Py_ssize_t)floorf( + (float)digits * decimal_digits_per_base_digits_ratio[base]); + } + if (num_decimal_digits > _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_base10_digits = interp->int_max_base10_digits; + if ((max_base10_digits > 0) && + (num_decimal_digits > max_base10_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_BASE10_DIGITS_ERROR_FMT, + num_decimal_digits); + return NULL; + } } } @@ -5394,16 +5439,15 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) if (obase == NULL) return PyNumber_Long(x); - if (obase != NULL) { - base = PyNumber_AsSsize_t(obase, NULL); - if (base == -1 && PyErr_Occurred()) - return NULL; - if ((base != 0 && base < 2) || base > 36) { - PyErr_SetString(PyExc_ValueError, - "int() base must be >= 2 and <= 36, or 0"); - return NULL; - } + base = PyNumber_AsSsize_t(obase, NULL); + if (base == -1 && PyErr_Occurred()) + return NULL; + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "int() base must be >= 2 and <= 36, or 0"); + return NULL; } + if (PyUnicode_Check(x)) return PyLong_FromUnicodeObject(x, (int)base); else if (PyByteArray_Check(x) || PyBytes_Check(x)) { @@ -6127,8 +6171,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, - {"default_max_digits", "maximum digits limitation"}, - {"max_digits_check_threshold", "minimum threshold to check for max digits"}, + {"default_max_base10_digits", "maximum base 10 digits limitation"}, + {"base10_digits_check_threshold", "minimum positive value for sys.set_int_max_base10_digits()"}, {NULL, NULL} }; @@ -6151,10 +6195,17 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + /* + * The following two fields were added after investigating uses of + * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was + * numba using sys.int_info.bits_per_digit as attribute access rather than + * sequence unpacking. Cython and sympy also refer to sys.int_info but only + * as info for debugging. No concern about adding these in a backport. + */ PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(_PY_LONG_DEFAULT_MAX_DIGITS)); + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_BASE10_DIGITS)); PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(_PY_LONG_MAX_DIGITS_THRESHOLD)); + PyLong_FromLong(_PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -6182,9 +6233,9 @@ _PyLong_InitTypes(PyInterpreterState *interp) return _PyStatus_ERR("can't init int info type"); } } - interp->intmaxdigits = _PyInterpreterState_GetConfig(interp)->intmaxdigits; - if (interp->intmaxdigits == -1) { - interp->intmaxdigits = _PY_LONG_DEFAULT_MAX_DIGITS; + interp->int_max_base10_digits = _PyInterpreterState_GetConfig(interp)->int_max_base10_digits; + if (interp->int_max_base10_digits == -1) { + interp->int_max_base10_digits = _PY_LONG_DEFAULT_MAX_BASE10_DIGITS; } return _PyStatus_OK(); diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 4e20f703721219..e94bbfb06fffe7 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -745,62 +745,54 @@ sys_mdebug(PyObject *module, PyObject *arg) #endif /* defined(USE_MALLOPT) */ -PyDoc_STRVAR(sys_getintmaxdigits__doc__, -"getintmaxdigits($module, /)\n" +PyDoc_STRVAR(sys_get_int_max_base10_digits__doc__, +"get_int_max_base10_digits($module, /)\n" "--\n" "\n" -"Get value of integer maximum digits limit."); +"Set the integer maximum decimal digits limit used on int<->str conversions."); -#define SYS_GETINTMAXDIGITS_METHODDEF \ - {"getintmaxdigits", (PyCFunction)sys_getintmaxdigits, METH_NOARGS, sys_getintmaxdigits__doc__}, +#define SYS_GET_INT_MAX_BASE10_DIGITS_METHODDEF \ + {"get_int_max_base10_digits", (PyCFunction)sys_get_int_max_base10_digits, METH_NOARGS, sys_get_int_max_base10_digits__doc__}, static PyObject * -sys_getintmaxdigits_impl(PyObject *module); +sys_get_int_max_base10_digits_impl(PyObject *module); static PyObject * -sys_getintmaxdigits(PyObject *module, PyObject *Py_UNUSED(ignored)) +sys_get_int_max_base10_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return sys_getintmaxdigits_impl(module); + return sys_get_int_max_base10_digits_impl(module); } -PyDoc_STRVAR(sys_setintmaxdigits__doc__, -"setintmaxdigits($module, /, maxdigits)\n" +PyDoc_STRVAR(sys_set_int_max_base10_digits__doc__, +"set_int_max_base10_digits($module, /, maxdigits)\n" "--\n" "\n" -"Set value of integer maximum digits limit."); +"Set the integer maximum decimal digits limit used on int<->str conversions."); -#define SYS_SETINTMAXDIGITS_METHODDEF \ - {"setintmaxdigits", _PyCFunction_CAST(sys_setintmaxdigits), METH_FASTCALL|METH_KEYWORDS, sys_setintmaxdigits__doc__}, +#define SYS_SET_INT_MAX_BASE10_DIGITS_METHODDEF \ + {"set_int_max_base10_digits", _PyCFunction_CAST(sys_set_int_max_base10_digits), METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_base10_digits__doc__}, static PyObject * -sys_setintmaxdigits_impl(PyObject *module, Py_ssize_t maxdigits); +sys_set_int_max_base10_digits_impl(PyObject *module, int maxdigits); static PyObject * -sys_setintmaxdigits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +sys_set_int_max_base10_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; static const char * const _keywords[] = {"maxdigits", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "setintmaxdigits", 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_base10_digits", 0}; PyObject *argsbuf[1]; - Py_ssize_t maxdigits; + int maxdigits; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { goto exit; } - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[0]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - maxdigits = ival; + maxdigits = _PyLong_AsInt(args[0]); + if (maxdigits == -1 && PyErr_Occurred()) { + goto exit; } - return_value = sys_setintmaxdigits_impl(module, maxdigits); + return_value = sys_set_int_max_base10_digits_impl(module, maxdigits); exit: return return_value; @@ -1328,4 +1320,4 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=6077bf7ef08d3318 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5539c6096dc5eab5 input=a9049054013a1b77]*/ diff --git a/Python/initconfig.c b/Python/initconfig.c index e3b9810cb1e902..cf27f3aa37909f 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3,6 +3,7 @@ #include "pycore_getopt.h" // _PyOS_GetOpt() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _PyInterpreterState.runtime +#include "pycore_long.h" // _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD #include "pycore_pathconfig.h" // _Py_path_config #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pylifecycle.h" // _Py_PreInitializeFromConfig() @@ -126,7 +127,9 @@ The following implementation-specific options are available:\n\ -X frozen_modules=[on|off]: whether or not frozen modules should be used.\n\ The default is \"on\" (or \"off\" if you are running a local build).\n\ \n\ --X intmaxdigits=number: limit maximum digits ints."; +-X int_max_base10_digits=number: limit the size of int<->str conversions.\n\ + This helps avoid denial of service attacks when parsing untrusted data.\n\ + The default is sys.int_info.default_max_base10_digits. 0 disables."; /* Envvars that don't have equivalent command-line options are listed first */ static const char usage_envvars[] = @@ -146,7 +149,10 @@ static const char usage_envvars[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" -"PYTHONINTMAXDIGITS: limt maximum digits when converting from or to int\n" +"PYTHONINTMAXBASE10DIGITS: limits the maximum decimal digits in an int value\n" +" when converting from a string and when converting an int back to a str.\n" +" A value of 0 disables the limit. Conversions from power of two number\n" +" bases are never limited.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" @@ -785,8 +791,8 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->safe_path = 0; config->_is_python_build = 0; config->code_debug_ranges = 1; - /* config_init_intmaxdigits() sets default limit */ - config->intmaxdigits = -1; + /* config_init_int_max_base10_digits() sets default limit */ + config->int_max_base10_digits = -1; } @@ -1013,7 +1019,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(safe_path); COPY_WSTRLIST(orig_argv); COPY_ATTR(_is_python_build); - COPY_ATTR(intmaxdigits); + COPY_ATTR(int_max_base10_digits); #undef COPY_ATTR #undef COPY_WSTR_ATTR @@ -1121,7 +1127,7 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_INT(use_frozen_modules); SET_ITEM_INT(safe_path); SET_ITEM_INT(_is_python_build); - SET_ITEM_INT(intmaxdigits); + SET_ITEM_INT(int_max_base10_digits); return dict; @@ -1770,38 +1776,47 @@ config_init_tracemalloc(PyConfig *config) } static PyStatus -config_init_intmaxdigits(PyConfig *config) +config_init_int_max_base10_digits(PyConfig *config) { int maxdigits; int valid = 0; /* default to unconfigured, _PyLong_InitTypes() does the rest */ - config->intmaxdigits = -1; + config->int_max_base10_digits = -1; - const char *env = config_get_env(config, "PYTHONINTMAXDIGITS"); + const char *env = config_get_env(config, "PYTHONINTMAXBASE10DIGITS"); if (env) { if (!_Py_str_to_int(env, &maxdigits)) { - valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)); + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)); } if (!valid) { - return _PyStatus_ERR("PYTHONINTMAXDIGITS: invalid limit"); +#define STRINGIFY(VAL) _STRINGIFY(VAL) +#define _STRINGIFY(VAL) #VAL + return _PyStatus_ERR( + "PYTHONINTMAXBASE10DIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) + " or 0 for unlimited."); } - config->intmaxdigits = maxdigits; + config->int_max_base10_digits = maxdigits; } - const wchar_t *xoption = config_get_xoption(config, L"intmaxdigits"); + const wchar_t *xoption = config_get_xoption(config, L"int_max_base10_digits"); if (xoption) { const wchar_t *sep = wcschr(xoption, L'='); if (sep) { if (!config_wstr_to_int(sep + 1, &maxdigits)) { - valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)); + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)); } } if (!valid) { - return _PyStatus_ERR("-X intmaxdigits: " - "invalid limit"); + return _PyStatus_ERR( + "-X int_max_base10_digits: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) + " or 0 for unlimited."); +#undef _STRINGIFY +#undef STRINGIFY } - config->intmaxdigits = maxdigits; + config->int_max_base10_digits = maxdigits; } return _PyStatus_OK(); } @@ -1869,8 +1884,8 @@ config_read_complex_options(PyConfig *config) } } - if (config->intmaxdigits < 0) { - status = config_init_intmaxdigits(config); + if (config->int_max_base10_digits < 0) { + status = config_init_int_max_base10_digits(config); if (_PyStatus_EXCEPTION(status)) { return status; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index fd349d8ebd152a..3fb20b39f55332 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -20,6 +20,7 @@ Data members: #include "pycore_code.h" // _Py_QuickenedCount #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() +#include "pycore_long.h" // _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD #include "pycore_namespace.h" // _PyNamespace_New() #include "pycore_object.h" // _PyObject_IS_GC() #include "pycore_pathconfig.h" // _PyPathConfig_ComputeSysPath0() @@ -1673,39 +1674,39 @@ sys_mdebug_impl(PyObject *module, int flag) /*[clinic input] -sys.getintmaxdigits +sys.get_int_max_base10_digits -Get value of integer maximum digits limit. +Set the integer maximum decimal digits limit used on int<->str conversions. [clinic start generated code]*/ static PyObject * -sys_getintmaxdigits_impl(PyObject *module) -/*[clinic end generated code: output=be8245491b631377 input=4c6cf29e9858e10e]*/ +sys_get_int_max_base10_digits_impl(PyObject *module) +/*[clinic end generated code: output=1b56ca97b75c4c7d input=af480955a120eb99]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - return PyLong_FromSsize_t(interp->intmaxdigits); + return PyLong_FromSsize_t(interp->int_max_base10_digits); } /*[clinic input] -sys.setintmaxdigits +sys.set_int_max_base10_digits - maxdigits: Py_ssize_t + maxdigits: int -Set value of integer maximum digits limit. +Set the integer maximum decimal digits limit used on int<->str conversions. [clinic start generated code]*/ static PyObject * -sys_setintmaxdigits_impl(PyObject *module, Py_ssize_t maxdigits) -/*[clinic end generated code: output=f08310ce0abd3fc7 input=66814100429a2b99]*/ +sys_set_int_max_base10_digits_impl(PyObject *module, int maxdigits) +/*[clinic end generated code: output=d0cc502962bdb9b5 input=52cf6736588172db]*/ { PyThreadState *tstate = _PyThreadState_GET(); - if ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_DIGITS_THRESHOLD)) { - tstate->interp->intmaxdigits = maxdigits; + if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)) { + tstate->interp->int_max_base10_digits = maxdigits; Py_RETURN_NONE; } else { PyErr_Format( - PyExc_ValueError, "maxdigits must be 0 or larger than %zd", - _PY_LONG_MAX_DIGITS_THRESHOLD); + PyExc_ValueError, "maxdigits must be 0 or larger than %d", + _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD); return NULL; } } @@ -2225,8 +2226,8 @@ static PyMethodDef sys_methods[] = { SYS_DEACTIVATE_STACK_TRAMPOLINE_METHODDEF SYS_IS_STACK_TRAMPOLINE_ACTIVE_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF - SYS_GETINTMAXDIGITS_METHODDEF - SYS_SETINTMAXDIGITS_METHODDEF + SYS_GET_INT_MAX_BASE10_DIGITS_METHODDEF + SYS_SET_INT_MAX_BASE10_DIGITS_METHODDEF #ifdef Py_STATS SYS__STATS_ON_METHODDEF SYS__STATS_OFF_METHODDEF @@ -2727,7 +2728,7 @@ static PyStructSequence_Field flags_fields[] = { {"utf8_mode", "-X utf8"}, {"warn_default_encoding", "-X warn_default_encoding"}, {"safe_path", "-P"}, - {"intmaxdigits", "-X intmaxdigits"}, + {"int_max_base10_digits", "-X int_max_base10_digits"}, {0} }; @@ -2776,7 +2777,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags) SetFlag(preconfig->utf8_mode); SetFlag(config->warn_default_encoding); SetFlagObj(PyBool_FromLong(config->safe_path)); - SetFlagObj(PyLong_FromSsize_t(config->intmaxdigits)); + SetFlag(config->int_max_base10_digits); #undef SetFlagObj #undef SetFlag return 0; From c90b79f6f1caa4f117fb775bdab9b3b5f075f16c Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 20:43:44 +0000 Subject: [PATCH 12/45] Improve the configuring docs. --- Doc/library/stdtypes.rst | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 80e50f18ba2291..d1b3a6f8f93190 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5494,10 +5494,8 @@ When an operation exceeds the limit, a :exc:`ValueError` is raised:: Configuring the limit --------------------- -* :data:`sys.int_info.default_max_base10_digits` is the compiled-in default - limit. -* :data:`sys.int_info.base10_digits_check_threshold` is the minimum accepted - value for the limit. +Before Python starts up you can use an environment variable or an interpreter +command line flag to configure the limit: * :envvar:`PYTHONINTMAXBASE10DIGITS`, e.g. ``PYTHONINTMAXBASE10DIGITS=4321 python3`` to set the limit to ``4321`` or @@ -5509,13 +5507,23 @@ Configuring the limit :option:`-X int_max_base10_digits <-X>`. In case both the env var and the ``-X`` option are set, the ``-X`` option takes precedence. The value of *-1* indicates that both were unset and the value of - :data:`sys.int_info.default_max_base10_digits` will be used. + :data:`sys.int_info.default_max_base10_digits` was used during initilization. + +From code, you can inspect the current limit and set a new one using these +:mod:`sys` APIs: * :func:`sys.get_int_max_base10_digits` and :func:`sys.set_int_max_base10_digits` are a getter and setter for the interpreter-wide limit. Subinterpreters have their own limit. +Information about the default and minimum can be found in :attr:`sys.int_info`: + +* :data:`sys.int_info.default_max_base10_digits ` is the + compiled-in default limit. +* :data:`sys.int_info.base10_digits_check_threshold ` is the + minimum accepted value for the limit. + Affected APIs ------------- From fea25ea04afbed819c628a6bc2e4f406e7ee49b1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 22:20:22 +0000 Subject: [PATCH 13/45] Stop tying to base10, just use string digits. Renamed to `int_max_str_digits` and simplified the logic per @Y1hgs's comments on the earlier revision. Less code, less awkward, and simpler to explain. Underscores and the sign are uncounted because that makes for the easiest implementation. --- Doc/c-api/init_config.rst | 10 ++-- Doc/library/functions.rst | 10 ++-- Doc/library/json.rst | 2 +- Doc/library/stdtypes.rst | 57 ++++++++++----------- Doc/library/sys.rst | 28 +++++------ Doc/using/cmdline.rst | 10 ++-- Include/cpython/initconfig.h | 2 +- Include/internal/pycore_interp.h | 2 +- Include/internal/pycore_long.h | 10 ++-- Lib/test/support/__init__.py | 10 ++-- Lib/test/test_cmd_line.py | 30 +++++------ Lib/test/test_decimal.py | 12 ++--- Lib/test/test_embed.py | 2 +- Lib/test/test_int.py | 84 ++++++++++++++++++------------- Lib/test/test_json/test_decode.py | 2 +- Lib/test/test_sys.py | 14 +++--- Lib/test/test_xmlrpc.py | 2 +- Objects/longobject.c | 82 +++++++++--------------------- Python/clinic/sysmodule.c.h | 36 ++++++------- Python/initconfig.c | 44 ++++++++-------- Python/sysmodule.c | 34 ++++++------- 21 files changed, 228 insertions(+), 255 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index b356a2923d86e8..5a1433b623c392 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -828,13 +828,13 @@ PyConfig Default: ``0``. - .. c:member:: int int_max_base10_digits + .. c:member:: int int_max_str_digits - If greater than 0, enable int digit limitation. ``-1`` means that - :data:`sys.int_info.default_max_base10_digits` will be used. + If greater than 0, enable int conversion digit limitations. ``-1`` means + that :data:`sys.int_info.default_max_str_digits` will be used. - Configured by the :option:`-X int_max_base10_digits <-X>` command line - flag or the :envvar:`PYTHONINTMAXBASE10DIGITS` environment varable. + Configured by the :option:`-X int_max_str_digits <-X>` command line + flag or the :envvar:`PYTHONINTMAXSTRDIGITS` environment varable. Default: ``-1``. diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7d06b321b8a555..eef0e2723cafde 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -911,10 +911,12 @@ are always available. They are listed here in alphabetical order. The delegation to :meth:`__trunc__` is deprecated. .. versionchanged:: 3.11 - :class:`int` string inputs and string representation can be limited - to help avoid denial of service attacks. A :exc:`ValueError` is raised - when an input or string representation exceeds the limit. See :ref:`int - maximum digits limitation ` for more information. + :class:`int` string inputs and string representations can be limited to + help avoid denial of service attacks. A :exc:`ValueError` is raised when + the limit is exceeded while converting a string *x* to an :class:`int` or + when converting a :class:`int` into a string would exceed the limit. See + :ref:`int maximum digits limitation ` for more + information. .. function:: isinstance(object, classinfo) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 3102cf6a0561a5..e094de18745ef7 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -261,7 +261,7 @@ Basic Usage .. versionchanged:: 3.11 The default implementation of *parse_int* limits the maximum length of the integer string via the interpreter's :ref:`int maximum digits - limitation ` mechanism to help avoid denial of + limitation ` mechanism to help avoid denial of service attacks. *parse_constant*, if specified, will be called with one of the following diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index d1b3a6f8f93190..98959d7fb0ec0b 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5460,7 +5460,7 @@ types, where they are relevant. Some of these are not reported by the [] -.. _int_max_base10_digits: +.. _int_max_str_digits: Integer maximum digits limitation ================================= @@ -5474,14 +5474,13 @@ power of *2*. Even the best known algorithms for base *10* have sub-quadratic complexity. Converting a large value such as ``int('1' * 500_000)`` can take over a second on a fast CPU. -The limit value uses base 10 as a reference point and scales with base. That -means an :class:`int` conversion accepts longer strings for smaller bases and -shorter strings for larger bases. Underscores and the sign in strings don't -count towards the limit. +The limit value is based on the number of digit characters in the input or +output string. That means that higher bases can process larger numbers before +the limit triggers. Underscores and the sign are not counted towards the limit. When an operation exceeds the limit, a :exc:`ValueError` is raised:: - >>> sys.set_int_max_base10_digits(2048) + >>> sys.set_int_max_str_digits(2048) >>> i = 10 ** 2047 >>> len(str(i)) 2048 @@ -5489,7 +5488,7 @@ When an operation exceeds the limit, a :exc:`ValueError` is raised:: >>> len(str(i)) Traceback (most recent call last): ... - ValueError: exceeds maximum integer base 10 digit limit + ValueError: Exceeds digit limit for string conversions: value has 2049 digits. Configuring the limit --------------------- @@ -5497,32 +5496,30 @@ Configuring the limit Before Python starts up you can use an environment variable or an interpreter command line flag to configure the limit: -* :envvar:`PYTHONINTMAXBASE10DIGITS`, e.g. - ``PYTHONINTMAXBASE10DIGITS=4321 python3`` to set the limit to ``4321`` or - ``PYTHONINTMAXBASE10DIGITS=0 python3`` to disable the limitation. -* :option:`-X int_max_base10_digits <-X>`, e.g. - ``python3 -X int_max_base10_digits=4321`` -* :data:`sys.flags.int_max_base10_digits` contains the value of - :envvar:`PYTHONINTMAXBASE10DIGITS` or - :option:`-X int_max_base10_digits <-X>`. In case both the env var and the - ``-X`` option are set, the ``-X`` option takes precedence. The value of - *-1* indicates that both were unset and the value of - :data:`sys.int_info.default_max_base10_digits` was used during initilization. +* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. + ``PYTHONINTMAXSTRDIGITS=4321 python3`` to set the limit to ``4321`` or + ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. +* :option:`-X int_max_str_digits <-X>`, e.g. + ``python3 -X int_max_str_digits=4321`` +* :data:`sys.flags.int_max_str_digits` contains the value of + :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`. + If both the env var and the ``-X`` option are set, the ``-X`` option takes + precedence. A value of *-1* indicates that both were unset, thus a value of + :data:`sys.int_info.default_max_str_digits` was used during initilization. From code, you can inspect the current limit and set a new one using these :mod:`sys` APIs: -* :func:`sys.get_int_max_base10_digits` and - :func:`sys.set_int_max_base10_digits` are a getter and setter for - the interpreter-wide limit. Subinterpreters have their own - limit. +* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are + a getter and setter for the interpreter-wide limit. Subinterpreters have + their own limit. Information about the default and minimum can be found in :attr:`sys.int_info`: -* :data:`sys.int_info.default_max_base10_digits ` is the - compiled-in default limit. -* :data:`sys.int_info.base10_digits_check_threshold ` is the - minimum accepted value for the limit. +* :data:`sys.int_info.default_max_str_digits ` is the compiled-in + default limit. +* :data:`sys.int_info.str_digits_check_threshold ` is the minimum + accepted value for the limit. Affected APIs ------------- @@ -5549,17 +5546,17 @@ The limitations do not apply to functions with a linear algorithm: Recommended configuration ------------------------- -The default :data:`sys.int_info.default_max_base10_digits` is expected to be +The default :data:`sys.int_info.default_max_str_digits` is expected to be reasonable for most applications. If your application requires a different limit, use Python version and implementation agnostic code to set it. Example:: >>> import sys - >>> if hasattr(sys, "set_int_max_base10_digits"): - ... current_limit = sys.get_int_max_base10_digits() + >>> if hasattr(sys, "set_int_max_str_digits"): + ... current_limit = sys.get_int_max_str_digits() ... if not current_limit or current_limit > 4321: - ... sys.set_int_max_base10_digits(4321) + ... sys.set_int_max_str_digits(4321) .. rubric:: Footnotes diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 425b2cd19cc54a..c6eb74ebd82781 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -521,7 +521,7 @@ always available. :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) :const:`utf8_mode` :option:`-X utf8 <-X>` :const:`safe_path` :option:`-P` - :const:`int_max_base10_digits` :option:`-X int_max_base10_digits <-X>` (:ref:`int maximum digits limitation `) + :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`int maximum digits limitation `) ================================== ====================================================================================================== .. versionchanged:: 3.2 @@ -545,7 +545,7 @@ always available. Added the ``safe_path`` attribute for :option:`-P` option. .. versionchanged:: 3.11 - Added the ``int_max_base10_digits`` attribute. + Added the ``int_max_str_digits`` attribute. .. data:: float_info @@ -727,10 +727,10 @@ always available. .. versionadded:: 3.6 -.. function:: get_int_max_base10_digits() +.. function:: get_int_max_str_digits() Return current global value for :ref:`int maximum digits limitation - `. See also :func:`set_int_max_base10_digits` + `. See also :func:`set_int_max_str_digits` .. versionadded:: 3.11 @@ -1017,22 +1017,20 @@ always available. | :const:`sizeof_digit` | size in bytes of the C type used to | | | represent a digit | +----------------------------------------+-----------------------------------------------+ - | :const:`default_max_base10_digits` | default value for | - | | :func:`sys.get_int_max_base10_digits` when it | + | :const:`default_max_str_digits` | default value for | + | | :func:`sys.get_int_max_str_digits` when it | | | is not otherwise explicitly configured. | +----------------------------------------+-----------------------------------------------+ - | :const:`base10_digits_check_threshold` | minimum non-zero value for | - | | :func:`sys.set_int_max_base10_digits`, | - | | :envvar:`PYTHONINTMAXBASE10DIGITS`, or | - | | :option:`-X int_max_base10_digits <-X>`. | - | | Supplied positive values less than this will | - | | be silently rounded up to this value. | + | :const:`str_digits_check_threshold` | minimum non-zero value for | + | | :func:`sys.set_int_max_str_digits`, | + | | :envvar:`PYTHONINTMAXSTRDIGITS`, or | + | | :option:`-X int_max_str_digits <-X>`. | +----------------------------------------+-----------------------------------------------+ .. versionadded:: 3.1 .. versionchanged:: 3.11 - Added ``default_max_base10_digits`` and ``base10_digits_check_threshold``. + Added ``default_max_str_digits`` and ``str_digits_check_threshold``. .. data:: __interactivehook__ @@ -1333,10 +1331,10 @@ always available. .. availability:: Unix. -.. function:: set_int_max_base10_digits(n) +.. function:: set_int_max_str_digits(n) Set global interpreter limit for :ref:`int maximum digits limitation - `. See also :func:`get_int_max_base10_digits` + `. See also :func:`get_int_max_str_digits` .. versionadded:: 3.11 diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index ffa90052568bf6..d0a19b59f52229 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -505,8 +505,8 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. - * ``-X int_max_base10_digits`` configures :ref:`int maximum digits limitation - `. See also :envvar:`PYTHONINTMAXBASE10DIGITS`. + * ``-X int_max_str_digits`` configures :ref:`int maximum digits limitation + `. See also :envvar:`PYTHONINTMAXSTRDIGITS`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded @@ -585,7 +585,7 @@ Miscellaneous options The ``-X frozen_modules`` option. .. versionadded:: 3.11 - The ``-X int_max_base10_digits`` option. + The ``-X int_max_str_digits`` option. .. versionadded:: 3.12 The ``-X perf`` option. @@ -768,10 +768,10 @@ conflict. .. versionadded:: 3.2.3 -.. envvar:: PYTHONINTMAXBASE10DIGITS +.. envvar:: PYTHONINTMAXSTRDIGITS If this variable is set to an integer, it is used to configure the interpreter's - global :ref:`int maximum digits limitation `. + global :ref:`int maximum digits limitation `. .. versionadded:: 3.11 diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 60dc2d2766381e..5cb967650dbfc4 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -178,7 +178,7 @@ typedef struct PyConfig { wchar_t *check_hash_pycs_mode; int use_frozen_modules; int safe_path; - int int_max_base10_digits; // NOTE(gpshead): do not backport to stable releases due to struct change. + int int_max_str_digits; // NOTE(gpshead): do not backport to stable releases due to struct change. /* --- Path configuration inputs ------------ */ int pathconfig_warnings; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index bf3ad9ebe259b6..a5ddcf2d72f051 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -176,7 +176,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; - int int_max_base10_digits; + int int_max_str_digits; /* The following fields are here to avoid allocation during init. The data is exposed through PyInterpreterState pointer fields. diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index c4c2ac41ab9acb..e3fb3b2515f230 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -20,7 +20,7 @@ extern "C" { * * 2000 decimal digits fits a ~6643 bit number. */ -#define _PY_LONG_DEFAULT_MAX_BASE10_DIGITS 2000 +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 2000 /* * Threshold for max digits check. For performance reasons int() and * int.__str__ don't checks values that are smaller than this @@ -32,11 +32,11 @@ extern "C" { * * 333 decimal digits fits a ~1106 bit number. */ -#define _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD 333 +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 333 -#if ((_PY_LONG_DEFAULT_MAX_BASE10_DIGITS != 0) && \ - (_PY_LONG_DEFAULT_MAX_BASE10_DIGITS < _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)) -# error "_PY_LONG_DEFAULT_MAX_BASE10_DIGITS smaller than threshold." +#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." #endif diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b5952798e36cfb..6142e6d3094532 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2344,11 +2344,11 @@ def sleeping_retry(timeout, err_msg=None, /, @contextlib.contextmanager -def set_int_max_base10_digits(max_digits): - """Temporarily change the integer maximum base 10 digits limit.""" - current = sys.get_int_max_base10_digits() +def set_int_max_str_digits(max_digits): + """Temporarily change the int<->str maximum digits limit.""" + current = sys.get_int_max_str_digits() try: - sys.set_int_max_base10_digits(max_digits) + sys.set_int_max_str_digits(max_digits) yield finally: - sys.set_int_max_base10_digits(current) + sys.set_int_max_str_digits(current) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 5a4985a534a956..aebcbe4a217464 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -866,36 +866,36 @@ def test_parsing_error(self): self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) self.assertNotEqual(proc.returncode, 0) - def test_int_max_base10_digits(self): - code = "import sys; print(sys.flags.int_max_base10_digits, sys.get_int_max_base10_digits())" + def test_int_max_str_digits(self): + code = "import sys; print(sys.flags.int_max_str_digits, sys.get_int_max_str_digits())" - assert_python_failure('-X', 'int_max_base10_digits', '-c', code) - assert_python_failure('-X', 'int_max_base10_digits=foo', '-c', code) - assert_python_failure('-X', 'int_max_base10_digits=100', '-c', code) + assert_python_failure('-X', 'int_max_str_digits', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=foo', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=100', '-c', code) - assert_python_failure('-c', code, PYTHONINTMAXBASE10DIGITS='foo') - assert_python_failure('-c', code, PYTHONINTMAXBASE10DIGITS='100') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100') def res2int(res): out = res.out.strip().decode("utf-8") return tuple(int(i) for i in out.split()) res = assert_python_ok('-c', code) - self.assertEqual(res2int(res), (-1, sys.get_int_max_base10_digits())) - res = assert_python_ok('-X', 'int_max_base10_digits=0', '-c', code) + self.assertEqual(res2int(res), (-1, sys.get_int_max_str_digits())) + res = assert_python_ok('-X', 'int_max_str_digits=0', '-c', code) self.assertEqual(res2int(res), (0, 0)) - res = assert_python_ok('-X', 'int_max_base10_digits=4000', '-c', code) + res = assert_python_ok('-X', 'int_max_str_digits=4000', '-c', code) self.assertEqual(res2int(res), (4000, 4000)) - res = assert_python_ok('-X', 'int_max_base10_digits=100000', '-c', code) + res = assert_python_ok('-X', 'int_max_str_digits=100000', '-c', code) self.assertEqual(res2int(res), (100000, 100000)) - res = assert_python_ok('-c', code, PYTHONINTMAXBASE10DIGITS='0') + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0') self.assertEqual(res2int(res), (0, 0)) - res = assert_python_ok('-c', code, PYTHONINTMAXBASE10DIGITS='4000') + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000') self.assertEqual(res2int(res), (4000, 4000)) res = assert_python_ok( - '-X', 'int_max_base10_digits=6000', '-c', code, - PYTHONINTMAXBASE10DIGITS='4000' + '-X', 'int_max_str_digits=6000', '-c', code, + PYTHONINTMAXSTRDIGITS='4000' ) self.assertEqual(res2int(res), (6000, 6000)) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index e8ee0e1d7a8afd..67ccaab40c5edc 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -2528,11 +2528,11 @@ class PyUsabilityTest(UsabilityTest): def setUp(self): super().setUp() - self._previous_int_limit = sys.get_int_max_base10_digits() - sys.set_int_max_base10_digits(7000) + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) def tearDown(self): - sys.set_int_max_base10_digits(self._previous_int_limit) + sys.set_int_max_str_digits(self._previous_int_limit) super().tearDown() class PythonAPItests(unittest.TestCase): @@ -4637,11 +4637,11 @@ class PyCoverage(Coverage): def setUp(self): super().setUp() - self._previous_int_limit = sys.get_int_max_base10_digits() - sys.set_int_max_base10_digits(7000) + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) def tearDown(self): - sys.set_int_max_base10_digits(self._previous_int_limit) + sys.set_int_max_str_digits(self._previous_int_limit) super().tearDown() class PyFunctionality(unittest.TestCase): diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index e36057ffaa38c7..18ce517c474f44 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -434,7 +434,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'install_signal_handlers': 1, 'use_hash_seed': 0, 'hash_seed': 0, - 'int_max_base10_digits': -1, + 'int_max_str_digits': -1, 'faulthandler': 0, 'tracemalloc': 0, 'perf_profiling': 0, diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index ccc782a22e37d1..bb42fb847df21f 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -578,32 +578,32 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) -class IntBase10DigitLimitsTests(unittest.TestCase): +class IntStrDigitLimitsTests(unittest.TestCase): int_class = int # Override this in subclasses to reuse the suite. def setUp(self): super().setUp() - self._previous_limit = sys.get_int_max_base10_digits() - sys.set_int_max_base10_digits(2048) + self._previous_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(2048) def tearDown(self): - sys.set_int_max_base10_digits(self._previous_limit) + sys.set_int_max_str_digits(self._previous_limit) super().tearDown() def test_disabled_limit(self): - self.assertGreater(sys.get_int_max_base10_digits(), 0) - self.assertLess(sys.get_int_max_base10_digits(), 20_000) - with support.set_int_max_base10_digits(0): - self.assertEqual(sys.get_int_max_base10_digits(), 0) + self.assertGreater(sys.get_int_max_str_digits(), 0) + self.assertLess(sys.get_int_max_str_digits(), 20_000) + with support.set_int_max_str_digits(0): + self.assertEqual(sys.get_int_max_str_digits(), 0) i = self.int_class('1' * 20_000) str(i) - self.assertGreater(sys.get_int_max_base10_digits(), 0) + self.assertGreater(sys.get_int_max_str_digits(), 0) - def test_max_base10_digits_edge_cases(self): + def test_max_str_digits_edge_cases(self): """Ignore the +/- sign and space padding.""" int_class = self.int_class - maxdigits = sys.get_int_max_base10_digits() + maxdigits = sys.get_int_max_str_digits() int_class('1' * maxdigits) int_class(' ' + '1' * maxdigits) @@ -619,8 +619,8 @@ def check(self, i, base=None): else: self.int_class(i, base) - def test_max_base10_digits(self): - maxdigits = sys.get_int_max_base10_digits() + def test_max_str_digits(self): + maxdigits = sys.get_int_max_str_digits() self.check('1' * (maxdigits + 1)) self.check(' ' + '1' * (maxdigits + 1)) @@ -635,7 +635,7 @@ def test_max_base10_digits(self): def test_power_of_two_bases_unlimited(self): """The limit does not apply to power of 2 bases.""" - maxdigits = sys.get_int_max_base10_digits() + maxdigits = sys.get_int_max_str_digits() for base in (2, 4, 8, 16, 32): with self.subTest(base=base): @@ -644,39 +644,51 @@ def test_power_of_two_bases_unlimited(self): self.int_class('1' * 100_000, base) def test_underscores_ignored(self): - """The limit ignores underscore separators.""" - maxdigits = sys.get_int_max_base10_digits() + maxdigits = sys.get_int_max_str_digits() - s = '1111_' * ((maxdigits) // 4) - s = s[:-1] - self.int_class(s) - self.check(s + '1') + triples = maxdigits // 3 + s = '111' * triples + s_ = '1_11' * triples + self.int_class(s) # succeeds + self.int_class(s_) # succeeds + self.check(f'{s}111') + self.check(f'{s_}_111') - def _other_base_helper(self, base): + def test_sign_not_counted(self): int_class = self.int_class - max_digits = sys.get_int_max_base10_digits() + max_digits = sys.get_int_max_str_digits() + s = '5' * max_digits + i = int_class(s) + pos_i = int_class(f'+{s}') + assert i == pos_i + neg_i = int_class(f'-{s}') + assert -pos_i == neg_i + str(pos_i) + str(neg_i) - base_digits = int(max_digits*log(10)/log(base)) + def _other_base_helper(self, base): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '2' * max_digits + i = int_class(s, base) if base > 10: - assert base_digits < max_digits + with self.assertRaises(ValueError): + str(i) elif base < 10: - assert base_digits > max_digits - s = '3' * base_digits - self.assertLessEqual(len(str(int_class(s, base))), max_digits) - int_class(f'{s}1', base) + str(i) with self.assertRaises(ValueError) as err: - int_class(f'{s}11', base) + int_class(f'{s}1', base) def test_int_from_other_bases(self): - """The limit should scale as an equivalent number of decimal digits.""" - with self.subTest(base=9): - self._other_base_helper(9) - - with self.subTest(base=36): - self._other_base_helper(36) + base = 3 + with self.subTest(base=base): + self._other_base_helper(base) + base = 36 + with self.subTest(base=base): + self._other_base_helper(base) -class IntSubclassBase10DigitLimitsTests(IntBase10DigitLimitsTests): +class IntSubclassStrDigitLimitsTests(IntStrDigitLimitsTests): int_class = IntSubclass diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index aa8d52f99be33f..6ae285fe5a8822 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -99,7 +99,7 @@ def test_negative_index(self): def test_limit_int(self): maxdigits = 5000 - with support.set_int_max_base10_digits(maxdigits): + with support.set_int_max_str_digits(maxdigits): self.loads('1' * maxdigits) with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 2432f7cac567a8..41482734872e06 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -553,14 +553,14 @@ def test_attributes(self): self.assertEqual(len(sys.int_info), 4) self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) self.assertTrue(sys.int_info.sizeof_digit >= 1) - self.assertGreaterEqual(sys.int_info.default_max_base10_digits, 500) - self.assertGreaterEqual(sys.int_info.base10_digits_check_threshold, 100) - self.assertGreater(sys.int_info.default_max_base10_digits, - sys.int_info.base10_digits_check_threshold) + self.assertGreaterEqual(sys.int_info.default_max_str_digits, 500) + self.assertGreaterEqual(sys.int_info.str_digits_check_threshold, 100) + self.assertGreater(sys.int_info.default_max_str_digits, + sys.int_info.str_digits_check_threshold) self.assertEqual(type(sys.int_info.bits_per_digit), int) self.assertEqual(type(sys.int_info.sizeof_digit), int) - self.assertIsInstance(sys.int_info.default_max_base10_digits, int) - self.assertIsInstance(sys.int_info.base10_digits_check_threshold, int) + self.assertIsInstance(sys.int_info.default_max_str_digits, int) + self.assertIsInstance(sys.int_info.str_digits_check_threshold, int) self.assertIsInstance(sys.hexversion, int) self.assertEqual(len(sys.hash_info), 9) @@ -683,7 +683,7 @@ def test_sys_flags(self): "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", "hash_randomization", "isolated", "dev_mode", "utf8_mode", - "warn_default_encoding", "safe_path", "int_max_base10_digits") + "warn_default_encoding", "safe_path", "int_max_str_digits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr in ("dev_mode", "safe_path") else int diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 0776f2265f9dd3..646ced0f003dde 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -295,7 +295,7 @@ def test_limit_int(self): check('123456780123456789', None) with self.assertRaises(ValueError): maxdigits = 5000 - with support.set_int_max_base10_digits(maxdigits): + with support.set_int_max_str_digits(maxdigits): s = '1' * (maxdigits + 1) check(f'{s}', None) diff --git a/Objects/longobject.c b/Objects/longobject.c index e26c48b17d0c4f..d977aa9c145748 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -37,7 +37,7 @@ medium_value(PyLongObject *x) #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS) -#define _MAX_BASE10_DIGITS_ERROR_FMT "Exceeds digit limit for string conversions: Value uses approximately %zd base 10 digits." +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds digit limit for string conversions: value has %zd digits." static inline void _Py_DECREF_INT(PyLongObject *op) @@ -1818,14 +1818,14 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } - if (strlen > _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) { + if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { PyInterpreterState *interp = _PyInterpreterState_GET(); - int max_base10_digits = interp->int_max_base10_digits; - Py_ssize_t strlen_nosign = strlen - negative; - if ((max_base10_digits > 0) && (strlen_nosign > max_base10_digits)) { + int max_str_digits = interp->int_max_str_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); - PyErr_Format(PyExc_ValueError, _MAX_BASE10_DIGITS_ERROR_FMT, - strlen_nosign); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + strlen_nosign); return -1; } } @@ -2374,7 +2374,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { - /* binary bases are not limited by int_max_base10_digits */ + /* binary bases are not limited by int_max_str_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2470,7 +2470,6 @@ digit beyond the first. twodigits c; /* current input character */ Py_ssize_t size_z; Py_ssize_t digits = 0; // Number of base $base digits in str. - Py_ssize_t underscores = 0; int i; int convwidth; twodigits convmultmax, convmult; @@ -2493,7 +2492,6 @@ digit beyond the first. while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base || *scan == '_') { if (*scan == '_') { - ++underscores; if (prev == '_') { /* Only one underscore allowed. */ str = lastdigit + 1; @@ -2514,48 +2512,14 @@ digit beyond the first. goto onError; } - /* The int_max_base10_digits limit ignores underscores. - * - * We compute the worst case number of base 10 digits to - * represent a $digits length number of base $base. The number - * isn't precise. It could be off by 1 and will be imprecise - * for a million digits but is close enough our needs. - */ - { - Py_ssize_t num_decimal_digits; - if (base == 10) { - num_decimal_digits = digits; - } - else // base != 10 && != 2,4,8,16,32 either; those were handled. - { - // float to save space, limiting doesn't require high precision. - // Limits beyond 1,000,000 may be imprecise as a result. - static float decimal_digits_per_base_digits_ratio[37] = {0.0e0,}; - if (decimal_digits_per_base_digits_ratio[base] == 0.0) { - if (log_base_BASE[10] == 0.0) { - fill_in_log_conversion_table( - 10, log_base_BASE, convwidth_base, convmultmax_base); - } - double log_10 = log_base_BASE[10]; - // The log table was already filled in for this earlier. - assert(log_base_BASE[base] != 0.0); - double log_base = log_base_BASE[base]; - decimal_digits_per_base_digits_ratio[base] = ( - log_base / log_10); - } - // digits * log_base / log_10 - num_decimal_digits = (Py_ssize_t)floorf( - (float)digits * decimal_digits_per_base_digits_ratio[base]); - } - if (num_decimal_digits > _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - int max_base10_digits = interp->int_max_base10_digits; - if ((max_base10_digits > 0) && - (num_decimal_digits > max_base10_digits)) { - PyErr_Format(PyExc_ValueError, _MAX_BASE10_DIGITS_ERROR_FMT, - num_decimal_digits); - return NULL; - } + /* Limit the size to avoid excessive computation attacks. */ + if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && (digits > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + digits); + return NULL; } } @@ -6171,8 +6135,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, - {"default_max_base10_digits", "maximum base 10 digits limitation"}, - {"base10_digits_check_threshold", "minimum positive value for sys.set_int_max_base10_digits()"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, {NULL, NULL} }; @@ -6203,9 +6167,9 @@ PyLong_GetInfo(void) * as info for debugging. No concern about adding these in a backport. */ PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(_PY_LONG_DEFAULT_MAX_BASE10_DIGITS)); + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(_PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)); + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -6233,9 +6197,9 @@ _PyLong_InitTypes(PyInterpreterState *interp) return _PyStatus_ERR("can't init int info type"); } } - interp->int_max_base10_digits = _PyInterpreterState_GetConfig(interp)->int_max_base10_digits; - if (interp->int_max_base10_digits == -1) { - interp->int_max_base10_digits = _PY_LONG_DEFAULT_MAX_BASE10_DIGITS; + interp->int_max_str_digits = _PyInterpreterState_GetConfig(interp)->int_max_str_digits; + if (interp->int_max_str_digits == -1) { + interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; } return _PyStatus_OK(); diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index e94bbfb06fffe7..6d6d93aa5f10a1 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -745,42 +745,42 @@ sys_mdebug(PyObject *module, PyObject *arg) #endif /* defined(USE_MALLOPT) */ -PyDoc_STRVAR(sys_get_int_max_base10_digits__doc__, -"get_int_max_base10_digits($module, /)\n" +PyDoc_STRVAR(sys_get_int_max_str_digits__doc__, +"get_int_max_str_digits($module, /)\n" "--\n" "\n" -"Set the integer maximum decimal digits limit used on int<->str conversions."); +"Set the maximum string digits limit for non-binary int<->str conversions."); -#define SYS_GET_INT_MAX_BASE10_DIGITS_METHODDEF \ - {"get_int_max_base10_digits", (PyCFunction)sys_get_int_max_base10_digits, METH_NOARGS, sys_get_int_max_base10_digits__doc__}, +#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \ + {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__}, static PyObject * -sys_get_int_max_base10_digits_impl(PyObject *module); +sys_get_int_max_str_digits_impl(PyObject *module); static PyObject * -sys_get_int_max_base10_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) +sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return sys_get_int_max_base10_digits_impl(module); + return sys_get_int_max_str_digits_impl(module); } -PyDoc_STRVAR(sys_set_int_max_base10_digits__doc__, -"set_int_max_base10_digits($module, /, maxdigits)\n" +PyDoc_STRVAR(sys_set_int_max_str_digits__doc__, +"set_int_max_str_digits($module, /, maxdigits)\n" "--\n" "\n" -"Set the integer maximum decimal digits limit used on int<->str conversions."); +"Set the maximum string digits limit for non-binary int<->str conversions."); -#define SYS_SET_INT_MAX_BASE10_DIGITS_METHODDEF \ - {"set_int_max_base10_digits", _PyCFunction_CAST(sys_set_int_max_base10_digits), METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_base10_digits__doc__}, +#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \ + {"set_int_max_str_digits", _PyCFunction_CAST(sys_set_int_max_str_digits), METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__}, static PyObject * -sys_set_int_max_base10_digits_impl(PyObject *module, int maxdigits); +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits); static PyObject * -sys_set_int_max_base10_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; static const char * const _keywords[] = {"maxdigits", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_base10_digits", 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0}; PyObject *argsbuf[1]; int maxdigits; @@ -792,7 +792,7 @@ sys_set_int_max_base10_digits(PyObject *module, PyObject *const *args, Py_ssize_ if (maxdigits == -1 && PyErr_Occurred()) { goto exit; } - return_value = sys_set_int_max_base10_digits_impl(module, maxdigits); + return_value = sys_set_int_max_str_digits_impl(module, maxdigits); exit: return return_value; @@ -1320,4 +1320,4 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=5539c6096dc5eab5 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=27e4e7e2264ea035 input=a9049054013a1b77]*/ diff --git a/Python/initconfig.c b/Python/initconfig.c index cf27f3aa37909f..fb5abb35100e12 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3,7 +3,7 @@ #include "pycore_getopt.h" // _PyOS_GetOpt() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _PyInterpreterState.runtime -#include "pycore_long.h" // _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_pathconfig.h" // _Py_path_config #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pylifecycle.h" // _Py_PreInitializeFromConfig() @@ -127,9 +127,9 @@ The following implementation-specific options are available:\n\ -X frozen_modules=[on|off]: whether or not frozen modules should be used.\n\ The default is \"on\" (or \"off\" if you are running a local build).\n\ \n\ --X int_max_base10_digits=number: limit the size of int<->str conversions.\n\ +-X int_max_str_digits=number: limit the size of int<->str conversions.\n\ This helps avoid denial of service attacks when parsing untrusted data.\n\ - The default is sys.int_info.default_max_base10_digits. 0 disables."; + The default is sys.int_info.default_max_str_digits. 0 disables."; /* Envvars that don't have equivalent command-line options are listed first */ static const char usage_envvars[] = @@ -149,7 +149,7 @@ static const char usage_envvars[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" -"PYTHONINTMAXBASE10DIGITS: limits the maximum decimal digits in an int value\n" +"PYTHONINTMAXSTRDIGITS: limits the maximum decimal digits in an int value\n" " when converting from a string and when converting an int back to a str.\n" " A value of 0 disables the limit. Conversions from power of two number\n" " bases are never limited.\n" @@ -791,8 +791,8 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->safe_path = 0; config->_is_python_build = 0; config->code_debug_ranges = 1; - /* config_init_int_max_base10_digits() sets default limit */ - config->int_max_base10_digits = -1; + /* config_init_int_max_str_digits() sets default limit */ + config->int_max_str_digits = -1; } @@ -1019,7 +1019,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(safe_path); COPY_WSTRLIST(orig_argv); COPY_ATTR(_is_python_build); - COPY_ATTR(int_max_base10_digits); + COPY_ATTR(int_max_str_digits); #undef COPY_ATTR #undef COPY_WSTR_ATTR @@ -1127,7 +1127,7 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_INT(use_frozen_modules); SET_ITEM_INT(safe_path); SET_ITEM_INT(_is_python_build); - SET_ITEM_INT(int_max_base10_digits); + SET_ITEM_INT(int_max_str_digits); return dict; @@ -1776,47 +1776,47 @@ config_init_tracemalloc(PyConfig *config) } static PyStatus -config_init_int_max_base10_digits(PyConfig *config) +config_init_int_max_str_digits(PyConfig *config) { int maxdigits; int valid = 0; /* default to unconfigured, _PyLong_InitTypes() does the rest */ - config->int_max_base10_digits = -1; + config->int_max_str_digits = -1; - const char *env = config_get_env(config, "PYTHONINTMAXBASE10DIGITS"); + const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS"); if (env) { if (!_Py_str_to_int(env, &maxdigits)) { - valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)); + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); } if (!valid) { #define STRINGIFY(VAL) _STRINGIFY(VAL) #define _STRINGIFY(VAL) #VAL return _PyStatus_ERR( - "PYTHONINTMAXBASE10DIGITS: invalid limit; must be >= " - STRINGIFY(_PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) + "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) " or 0 for unlimited."); } - config->int_max_base10_digits = maxdigits; + config->int_max_str_digits = maxdigits; } - const wchar_t *xoption = config_get_xoption(config, L"int_max_base10_digits"); + const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); if (xoption) { const wchar_t *sep = wcschr(xoption, L'='); if (sep) { if (!config_wstr_to_int(sep + 1, &maxdigits)) { - valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)); + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); } } if (!valid) { return _PyStatus_ERR( - "-X int_max_base10_digits: invalid limit; must be >= " - STRINGIFY(_PY_LONG_MAX_BASE10_DIGITS_THRESHOLD) + "-X int_max_str_digits: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) " or 0 for unlimited."); #undef _STRINGIFY #undef STRINGIFY } - config->int_max_base10_digits = maxdigits; + config->int_max_str_digits = maxdigits; } return _PyStatus_OK(); } @@ -1884,8 +1884,8 @@ config_read_complex_options(PyConfig *config) } } - if (config->int_max_base10_digits < 0) { - status = config_init_int_max_base10_digits(config); + if (config->int_max_str_digits < 0) { + status = config_init_int_max_str_digits(config); if (_PyStatus_EXCEPTION(status)) { return status; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 3fb20b39f55332..43109445e75d10 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -20,7 +20,7 @@ Data members: #include "pycore_code.h" // _Py_QuickenedCount #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() -#include "pycore_long.h" // _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_namespace.h" // _PyNamespace_New() #include "pycore_object.h" // _PyObject_IS_GC() #include "pycore_pathconfig.h" // _PyPathConfig_ComputeSysPath0() @@ -1674,39 +1674,39 @@ sys_mdebug_impl(PyObject *module, int flag) /*[clinic input] -sys.get_int_max_base10_digits +sys.get_int_max_str_digits -Set the integer maximum decimal digits limit used on int<->str conversions. +Set the maximum string digits limit for non-binary int<->str conversions. [clinic start generated code]*/ static PyObject * -sys_get_int_max_base10_digits_impl(PyObject *module) -/*[clinic end generated code: output=1b56ca97b75c4c7d input=af480955a120eb99]*/ +sys_get_int_max_str_digits_impl(PyObject *module) +/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - return PyLong_FromSsize_t(interp->int_max_base10_digits); + return PyLong_FromSsize_t(interp->int_max_str_digits); } /*[clinic input] -sys.set_int_max_base10_digits +sys.set_int_max_str_digits maxdigits: int -Set the integer maximum decimal digits limit used on int<->str conversions. +Set the maximum string digits limit for non-binary int<->str conversions. [clinic start generated code]*/ static PyObject * -sys_set_int_max_base10_digits_impl(PyObject *module, int maxdigits) -/*[clinic end generated code: output=d0cc502962bdb9b5 input=52cf6736588172db]*/ +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits) +/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/ { PyThreadState *tstate = _PyThreadState_GET(); - if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD)) { - tstate->interp->int_max_base10_digits = maxdigits; + if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { + tstate->interp->int_max_str_digits = maxdigits; Py_RETURN_NONE; } else { PyErr_Format( PyExc_ValueError, "maxdigits must be 0 or larger than %d", - _PY_LONG_MAX_BASE10_DIGITS_THRESHOLD); + _PY_LONG_MAX_STR_DIGITS_THRESHOLD); return NULL; } } @@ -2226,8 +2226,8 @@ static PyMethodDef sys_methods[] = { SYS_DEACTIVATE_STACK_TRAMPOLINE_METHODDEF SYS_IS_STACK_TRAMPOLINE_ACTIVE_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF - SYS_GET_INT_MAX_BASE10_DIGITS_METHODDEF - SYS_SET_INT_MAX_BASE10_DIGITS_METHODDEF + SYS_GET_INT_MAX_STR_DIGITS_METHODDEF + SYS_SET_INT_MAX_STR_DIGITS_METHODDEF #ifdef Py_STATS SYS__STATS_ON_METHODDEF SYS__STATS_OFF_METHODDEF @@ -2728,7 +2728,7 @@ static PyStructSequence_Field flags_fields[] = { {"utf8_mode", "-X utf8"}, {"warn_default_encoding", "-X warn_default_encoding"}, {"safe_path", "-P"}, - {"int_max_base10_digits", "-X int_max_base10_digits"}, + {"int_max_str_digits", "-X int_max_str_digits"}, {0} }; @@ -2777,7 +2777,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags) SetFlag(preconfig->utf8_mode); SetFlag(config->warn_default_encoding); SetFlagObj(PyBool_FromLong(config->safe_path)); - SetFlag(config->int_max_base10_digits); + SetFlag(config->int_max_str_digits); #undef SetFlagObj #undef SetFlag return 0; From ac9f22f2cc4d7a223d9574dc38afb3f36235a97d Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 22:26:11 +0000 Subject: [PATCH 14/45] Remove the added now-unneeded helper log tbl fn. --- Objects/longobject.c | 50 ++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index d977aa9c145748..9b744f87fadb2b 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2273,38 +2273,6 @@ long_from_binary_base(const char **str, int base, PyLongObject **res) return 0; } -/* - * A helper function to precompute valies in a small static log table used - * in PyLong_String. The caller should check if the table is already filled - * in (non-zero value) at the [base] index before calling. - * - * Appropriate values in log_base_table, convwidth_base, and convmultmax_base - * are computed and filled in at index [base]. - */ -static void fill_in_log_conversion_table( - int base, - double *log_base_table, - int *convwidth_base, - twodigits *convmultmax_base) -{ - twodigits convmax = base; - int i = 1; - - log_base_table[base] = (log((double)base) / - log((double)PyLong_BASE)); - for (;;) { - twodigits next = convmax * base; - if (next > PyLong_BASE) { - break; - } - convmax = next; - ++i; - } - convmultmax_base[base] = convmax; - assert(i > 0); - convwidth_base[base] = i; -} - /* Parses an int from a bytestring. Leading and trailing whitespace will be * ignored. * @@ -2482,8 +2450,22 @@ digit beyond the first. static twodigits convmultmax_base[37] = {0,}; if (log_base_BASE[base] == 0.0) { - fill_in_log_conversion_table( - base, log_base_BASE, convwidth_base, convmultmax_base); + twodigits convmax = base; + int i = 1; + + log_base_BASE[base] = (log((double)base) / + log((double)PyLong_BASE)); + for (;;) { + twodigits next = convmax * base; + if (next > PyLong_BASE) { + break; + } + convmax = next; + ++i; + } + convmultmax_base[base] = convmax; + assert(i > 0); + convwidth_base[base] = i; } /* Find length of the string of numeric characters. */ From da72dd146241874d9cda6eae2c311ab4d6c4e899 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 22:42:30 +0000 Subject: [PATCH 15/45] prevent intdostimeit from emitting errors in test_tools. --- Tools/scripts/intdostimeit.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Tools/scripts/intdostimeit.py b/Tools/scripts/intdostimeit.py index e8d33e6f1400fd..9e9f1cfef780ea 100644 --- a/Tools/scripts/intdostimeit.py +++ b/Tools/scripts/intdostimeit.py @@ -24,8 +24,11 @@ 1000000 digits 1 loop, best of 5: 5.2 sec per loop """ +import sys import timeit +unlimiter = getattr(sys, "set_int_max_str_digits", lambda _: None) +unlimiter(0) # Disable the limit when running on a limited interpreter. for i in [ 100, From d7e4d7bb4ff88c6a3de98a6b89dd8cff30fc4436 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 22:49:23 +0000 Subject: [PATCH 16/45] Remove a leftover base 10 reference. clarify. --- Python/initconfig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/initconfig.c b/Python/initconfig.c index fb5abb35100e12..3a759ff6f8e99d 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -149,10 +149,10 @@ static const char usage_envvars[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" -"PYTHONINTMAXSTRDIGITS: limits the maximum decimal digits in an int value\n" +"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n" " when converting from a string and when converting an int back to a str.\n" -" A value of 0 disables the limit. Conversions from power of two number\n" -" bases are never limited.\n" +" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n" +" 16, and 32 are never limited.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" From 5c7e6d546c56974e75284eeaada9348178acf2a5 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 23:35:47 +0000 Subject: [PATCH 17/45] versionadded/changed to 3.12 Playing it safe, if this lands in 3.11 before 3.11.0 these can be updated to 3.11. --- Doc/c-api/init_config.rst | 2 ++ Doc/library/functions.rst | 2 +- Doc/library/json.rst | 2 +- Doc/library/stdtypes.rst | 2 ++ Doc/library/sys.rst | 8 ++++---- Doc/using/cmdline.rst | 2 +- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 5a1433b623c392..00d63f92df220b 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -838,6 +838,8 @@ PyConfig Default: ``-1``. + .. versionadded:: 3.12 + .. c:member:: int isolated If greater than ``0``, enable isolated mode: diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index eef0e2723cafde..f926e4360264b8 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -910,7 +910,7 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.11 The delegation to :meth:`__trunc__` is deprecated. - .. versionchanged:: 3.11 + .. versionchanged:: 3.12 :class:`int` string inputs and string representations can be limited to help avoid denial of service attacks. A :exc:`ValueError` is raised when the limit is exceeded while converting a string *x* to an :class:`int` or diff --git a/Doc/library/json.rst b/Doc/library/json.rst index e094de18745ef7..02a422b433e5e7 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -258,7 +258,7 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - .. versionchanged:: 3.11 + .. versionchanged:: 3.12 The default implementation of *parse_int* limits the maximum length of the integer string via the interpreter's :ref:`int maximum digits limitation ` mechanism to help avoid denial of diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 98959d7fb0ec0b..81e74695ea61e3 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5490,6 +5490,8 @@ When an operation exceeds the limit, a :exc:`ValueError` is raised:: ... ValueError: Exceeds digit limit for string conversions: value has 2049 digits. +.. versionadded:: 3.12 + Configuring the limit --------------------- diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index c6eb74ebd82781..1be19794ed1946 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -544,7 +544,7 @@ always available. .. versionchanged:: 3.11 Added the ``safe_path`` attribute for :option:`-P` option. - .. versionchanged:: 3.11 + .. versionchanged:: 3.12 Added the ``int_max_str_digits`` attribute. @@ -732,7 +732,7 @@ always available. Return current global value for :ref:`int maximum digits limitation `. See also :func:`set_int_max_str_digits` - .. versionadded:: 3.11 + .. versionadded:: 3.12 .. function:: getrefcount(object) @@ -1029,7 +1029,7 @@ always available. .. versionadded:: 3.1 - .. versionchanged:: 3.11 + .. versionchanged:: 3.12 Added ``default_max_str_digits`` and ``str_digits_check_threshold``. @@ -1336,7 +1336,7 @@ always available. Set global interpreter limit for :ref:`int maximum digits limitation `. See also :func:`get_int_max_str_digits` - .. versionadded:: 3.11 + .. versionadded:: 3.12 .. function:: setprofile(profilefunc) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index d0a19b59f52229..40048f076f0642 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -584,7 +584,7 @@ Miscellaneous options .. versionadded:: 3.11 The ``-X frozen_modules`` option. - .. versionadded:: 3.11 + .. versionadded:: 3.12 The ``-X int_max_str_digits`` option. .. versionadded:: 3.12 From 61a5bc9af5d12f77244e00253343699dbb5a0b84 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 23:44:10 +0000 Subject: [PATCH 18/45] Link to the CVE from the main doc. --- Doc/library/stdtypes.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 81e74695ea61e3..74285b262c91c4 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5490,6 +5490,9 @@ When an operation exceeds the limit, a :exc:`ValueError` is raised:: ... ValueError: Exceeds digit limit for string conversions: value has 2049 digits. +This limit offers a practical way to avoid `CVE-2020-10735 +https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735>`_. + .. versionadded:: 3.12 Configuring the limit From c15addec808b930dfb29cafa5de030f38f287a5f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 23:53:04 +0000 Subject: [PATCH 19/45] Add a What's New entry. --- Doc/whatsnew/3.12.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index f9fa8ac3123198..03193d0b75cc0a 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -83,6 +83,14 @@ Other Language Changes mapping is hashable. (Contributed by Serhiy Storchaka in :gh:`87995`.) +* Converting between :class:`int` and :class:`str` in non binary multiple bases + such as human friendly base 10 now limits the maximum number of string digits + by default to avoid potential denial of service attacks. This is a mitigation + for `CVE-2020-10735 + `_. The limit + can be configured by environment variable, command line flag, or :mod:`sys` + APIs. See the :ref:`int maximum digits limitation ` docs. + New Modules =========== From 76ae1c2ab2db31b1497bb0a5efefe3b0e24abdff Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 7 Aug 2022 23:57:50 +0000 Subject: [PATCH 20/45] Add a Misc/NEWS.d entry. --- .../2022-08-07-16-53.CVE-2020-10735.ch010gps.rst | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst b/Misc/NEWS.d/next/Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst new file mode 100644 index 00000000000000..9194c5c1ff35a4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst @@ -0,0 +1,9 @@ +Converting between :class:`int` and :class:`str` in non binary multiple bases +(2, 4, 8, 16, & 32) such as base 10 now limits the maximum number of string +digits by default to avoid potential denial of service attacks. This is a +mitigation for `CVE-2020-10735 +`_. + +This new limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`int maximum digits limitation +` docs. From 1ad88f551dd2dbcbcb1d8f57a4f05dfddc39d587 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Mon, 8 Aug 2022 00:37:50 +0000 Subject: [PATCH 21/45] Undo addition to PyConfig to ease backporting. Release branches MUST NOT have `struct PyConfig` change. Doing it this way initially for 3.12 simplifies the backporting process. This commit is intended to be reverted in 3.12 after this lands in older releases. --- Doc/c-api/init_config.rst | 12 ------------ Include/cpython/initconfig.h | 1 - Include/internal/pycore_initconfig.h | 2 ++ Lib/test/test_embed.py | 1 - Objects/longobject.c | 2 +- Python/initconfig.c | 17 +++++++---------- Python/sysmodule.c | 2 +- 7 files changed, 11 insertions(+), 26 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 00d63f92df220b..c4a342ee811ca9 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -828,18 +828,6 @@ PyConfig Default: ``0``. - .. c:member:: int int_max_str_digits - - If greater than 0, enable int conversion digit limitations. ``-1`` means - that :data:`sys.int_info.default_max_str_digits` will be used. - - Configured by the :option:`-X int_max_str_digits <-X>` command line - flag or the :envvar:`PYTHONINTMAXSTRDIGITS` environment varable. - - Default: ``-1``. - - .. versionadded:: 3.12 - .. c:member:: int isolated If greater than ``0``, enable isolated mode: diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 5cb967650dbfc4..c6057a4c3ed945 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -178,7 +178,6 @@ typedef struct PyConfig { wchar_t *check_hash_pycs_mode; int use_frozen_modules; int safe_path; - int int_max_str_digits; // NOTE(gpshead): do not backport to stable releases due to struct change. /* --- Path configuration inputs ------------ */ int pathconfig_warnings; diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 69f88d7d1d46b8..6e491261d55ca6 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -170,6 +170,8 @@ extern void _Py_DumpPathConfig(PyThreadState *tstate); PyAPI_FUNC(PyObject*) _Py_Get_Getpath_CodeObject(void); +extern int _Py_global_config_int_max_str_digits; // TODO(gpshead): move this into PyConfig in 3.12 after the backports ship. + /* --- Function used for testing ---------------------------------- */ diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 18ce517c474f44..70d7367ea9e64f 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -434,7 +434,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'install_signal_handlers': 1, 'use_hash_seed': 0, 'hash_seed': 0, - 'int_max_str_digits': -1, 'faulthandler': 0, 'tracemalloc': 0, 'perf_profiling': 0, diff --git a/Objects/longobject.c b/Objects/longobject.c index 9b744f87fadb2b..2c623acbed5ecc 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6179,7 +6179,7 @@ _PyLong_InitTypes(PyInterpreterState *interp) return _PyStatus_ERR("can't init int info type"); } } - interp->int_max_str_digits = _PyInterpreterState_GetConfig(interp)->int_max_str_digits; + interp->int_max_str_digits = _Py_global_config_int_max_str_digits; if (interp->int_max_str_digits == -1) { interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; } diff --git a/Python/initconfig.c b/Python/initconfig.c index 3a759ff6f8e99d..f18ec4068bc443 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -791,10 +791,12 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->safe_path = 0; config->_is_python_build = 0; config->code_debug_ranges = 1; - /* config_init_int_max_str_digits() sets default limit */ - config->int_max_str_digits = -1; } +/* Excluded from public struct PyConfig for backporting reasons. */ +/* default to unconfigured, _PyLong_InitTypes() does the rest */ +int _Py_global_config_int_max_str_digits = -1; + static void config_init_defaults(PyConfig *config) @@ -1019,7 +1021,6 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(safe_path); COPY_WSTRLIST(orig_argv); COPY_ATTR(_is_python_build); - COPY_ATTR(int_max_str_digits); #undef COPY_ATTR #undef COPY_WSTR_ATTR @@ -1127,7 +1128,6 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_INT(use_frozen_modules); SET_ITEM_INT(safe_path); SET_ITEM_INT(_is_python_build); - SET_ITEM_INT(int_max_str_digits); return dict; @@ -1781,9 +1781,6 @@ config_init_int_max_str_digits(PyConfig *config) int maxdigits; int valid = 0; - /* default to unconfigured, _PyLong_InitTypes() does the rest */ - config->int_max_str_digits = -1; - const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS"); if (env) { if (!_Py_str_to_int(env, &maxdigits)) { @@ -1797,7 +1794,7 @@ config_init_int_max_str_digits(PyConfig *config) STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) " or 0 for unlimited."); } - config->int_max_str_digits = maxdigits; + _Py_global_config_int_max_str_digits = maxdigits; } const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); @@ -1816,7 +1813,7 @@ config_init_int_max_str_digits(PyConfig *config) #undef _STRINGIFY #undef STRINGIFY } - config->int_max_str_digits = maxdigits; + _Py_global_config_int_max_str_digits = maxdigits; } return _PyStatus_OK(); } @@ -1884,7 +1881,7 @@ config_read_complex_options(PyConfig *config) } } - if (config->int_max_str_digits < 0) { + if (_Py_global_config_int_max_str_digits < 0) { status = config_init_int_max_str_digits(config); if (_PyStatus_EXCEPTION(status)) { return status; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 43109445e75d10..a33abb2a8412db 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2777,7 +2777,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags) SetFlag(preconfig->utf8_mode); SetFlag(config->warn_default_encoding); SetFlagObj(PyBool_FromLong(config->safe_path)); - SetFlag(config->int_max_str_digits); + SetFlag(_Py_global_config_int_max_str_digits); #undef SetFlagObj #undef SetFlag return 0; From 0c83111439b04784031feb4c5c0d2421f6e687f6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Mon, 8 Aug 2022 00:46:49 +0000 Subject: [PATCH 22/45] Remove the Tools/scripts/ example and timing code. --- Tools/scripts/intdoshttpserver.py | 19 ------------ Tools/scripts/intdostimeit.py | 48 ------------------------------- 2 files changed, 67 deletions(-) delete mode 100644 Tools/scripts/intdoshttpserver.py delete mode 100644 Tools/scripts/intdostimeit.py diff --git a/Tools/scripts/intdoshttpserver.py b/Tools/scripts/intdoshttpserver.py deleted file mode 100644 index 710b0ec244d3c1..00000000000000 --- a/Tools/scripts/intdoshttpserver.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -import http.server - - -class IntDosRequestHandler(http.server.BaseHTTPRequestHandler): - content_length_digits = 5 - cookie_version_digits = 40_000 - - def do_GET(self): - self.send_response(200) - self.send_header("Content-Type", "text/plain") - self.send_header("Content-Length", "1" + ("0" * self.content_length_digits)) - self.send_header("Cookie", "version=1" + ("0" * self.cookie_version_digits)) - self.end_headers() - self.wfile.write(b"Really long content-length") - - -if __name__ == "__main__": - http.server.test(HandlerClass=IntDosRequestHandler) diff --git a/Tools/scripts/intdostimeit.py b/Tools/scripts/intdostimeit.py deleted file mode 100644 index 9e9f1cfef780ea..00000000000000 --- a/Tools/scripts/intdostimeit.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -""" - -Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz - -100 digits -1000000 loops, best of 5: 371 nsec per loop -1000 digits -50000 loops, best of 5: 7.94 usec per loop -5000 digits -2000 loops, best of 5: 142 usec per loop -10000 digits -500 loops, best of 5: 543 usec per loop -25000 digits -100 loops, best of 5: 3.31 msec per loop -50000 digits -20 loops, best of 5: 12.8 msec per loop -100000 digits -5 loops, best of 5: 52.4 msec per loop -250000 digits -1 loop, best of 5: 318 msec per loop -500000 digits -1 loop, best of 5: 1.27 sec per loop -1000000 digits -1 loop, best of 5: 5.2 sec per loop -""" -import sys -import timeit - -unlimiter = getattr(sys, "set_int_max_str_digits", lambda _: None) -unlimiter(0) # Disable the limit when running on a limited interpreter. - -for i in [ - 100, - 1_000, - 5_000, - 10_000, - 25_000, - 50_000, - 100_000, - 250_000, - 500_000, - 1_000_000, -]: - print(f"{i} digits") - timeit.main( - ["-s", f"s = '1' + ('0' * {i})", "int(s)",] - ) From 5d39ab632258f2d76ca8125baff7936239874641 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Mon, 8 Aug 2022 04:14:00 +0000 Subject: [PATCH 23/45] un-add the include (not needed for PR anymore) --- Objects/longobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 2c623acbed5ecc..e71e80cac303cc 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -13,7 +13,6 @@ #include #include -#include #include #include // abs() From 5b77b3ea4cf2add30128837408c04787ba480373 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 8 Aug 2022 00:30:08 -0700 Subject: [PATCH 24/45] Remove added unused imports. --- Lib/test/test_cmd_line.py | 1 - Lib/test/test_int.py | 1 - Lib/test/test_json/test_decode.py | 1 - 3 files changed, 3 deletions(-) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index aebcbe4a217464..db967088804ae2 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -3,7 +3,6 @@ # See test_cmd_line_script.py for testing of script execution import os -import re import subprocess import sys import tempfile diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index bb42fb847df21f..ef8c04efdb2c6f 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,3 @@ -from math import log import sys import unittest diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 6ae285fe5a8822..618d72eecc9fda 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -3,7 +3,6 @@ from collections import OrderedDict from test.test_json import PyTest, CTest from test import support -import sys class TestDecode: From de00cdc4d8e556e194b950de565c3e3b06c0e07a Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 8 Aug 2022 00:30:22 -0700 Subject: [PATCH 25/45] Tabs -> Spaces --- Objects/longobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index e71e80cac303cc..04cf71fe4c3444 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1820,11 +1820,11 @@ long_to_decimal_string_internal(PyObject *aa, if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { PyInterpreterState *interp = _PyInterpreterState_GET(); int max_str_digits = interp->int_max_str_digits; - Py_ssize_t strlen_nosign = strlen - negative; + Py_ssize_t strlen_nosign = strlen - negative; if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - strlen_nosign); + strlen_nosign); return -1; } } @@ -2436,7 +2436,7 @@ digit beyond the first. ***/ twodigits c; /* current input character */ Py_ssize_t size_z; - Py_ssize_t digits = 0; // Number of base $base digits in str. + Py_ssize_t digits = 0; int i; int convwidth; twodigits convmultmax, convmult; From 3cc8553e75f7f135cccbf693a710e96b16590d72 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 8 Aug 2022 01:23:07 -0700 Subject: [PATCH 26/45] make html and make doctest in Doc pass. --- Doc/library/stdtypes.rst | 2 +- .../2022-08-07-16-53.gh-issue-95778.ch010gps.rst} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename Misc/NEWS.d/next/{Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst => Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst} (100%) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 74285b262c91c4..4eb75c4025ff5a 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5491,7 +5491,7 @@ When an operation exceeds the limit, a :exc:`ValueError` is raised:: ValueError: Exceeds digit limit for string conversions: value has 2049 digits. This limit offers a practical way to avoid `CVE-2020-10735 -https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735>`_. +`_. .. versionadded:: 3.12 diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst similarity index 100% rename from Misc/NEWS.d/next/Core and Builtins/2022-08-07-16-53.CVE-2020-10735.ch010gps.rst rename to Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst From da97e659e2258b6db720ed45e0099a0ecabdb2b9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Wed, 10 Aug 2022 00:26:05 -0700 Subject: [PATCH 27/45] Raise the default limit and the threshold. Intended to minimize disruption. Based on real world testing across a huge Python code base including a large corpus of open source project test suites. --- Include/internal/pycore_long.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index e3fb3b2515f230..76c9a5fcecfabc 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -12,27 +12,34 @@ extern "C" { #include "pycore_runtime.h" // _PyRuntime /* - * Default int base conversion size limitation. + * Default int base conversion size limitation: Denial of Service prevention. * - * Chosen such that this isn't wildly slow on modern hardware: - * % python -m timeit -s 's = "1"*2000; v = int(s)' 'str(int(s))' - * 2000 loops, best of 5: 100 usec per loop + * Chosen such that this isn't wildly slow on modern hardware and so that + * everyone's existing deployed numpy test suite passes before + * https://github.com/numpy/numpy/issues/22098 is widely available. * - * 2000 decimal digits fits a ~6643 bit number. + * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * 2000 loops, best of 5: 125 usec per loop + * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * 1000 loops, best of 5: 311 usec per loop + * (zen2 cloud VM) + * + * 4300 decimal digits fits a ~14284 bit number. */ -#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 2000 +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 /* * Threshold for max digits check. For performance reasons int() and - * int.__str__ don't checks values that are smaller than this + * int.__str__() don't checks values that are smaller than this * threshold. Acts as a guaranteed minimum size limit for bignums that * applications can expect from CPython. * - * % python -m timeit -s 's = "1"*333; v = int(s)' 'str(int(s))' - * 100000 loops, best of 5: 3.94 usec per loop + * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))' + * 20000 loops, best of 5: 12 usec per loop * - * 333 decimal digits fits a ~1106 bit number. + * "640 digits should be enough for anyone." - gps + * fits a ~2126 bit decimal number. */ -#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 333 +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 #if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) From ef03a16c054e094ab6b4b5754f000ff2fc333b16 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Fri, 12 Aug 2022 15:49:17 -0700 Subject: [PATCH 28/45] Remove xmlrpc.client changes, test-only. These aren't quite right and belong in their own feature request anyways. The test validates that our overall limit triggers to acknowledge that the protection is desirable here. --- Lib/test/test_xmlrpc.py | 12 ++++++------ Lib/xmlrpc/client.py | 10 +--------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 646ced0f003dde..5be964a0acd7e4 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -291,12 +291,12 @@ def test_load_extension_types(self): def test_limit_int(self): check = self.check_loads - with self.assertRaises(OverflowError): - check('123456780123456789', None) - with self.assertRaises(ValueError): - maxdigits = 5000 - with support.set_int_max_str_digits(maxdigits): - s = '1' * (maxdigits + 1) + maxdigits = 5000 + with support.set_int_max_str_digits(maxdigits): + s = '1' * (maxdigits + 1) + with self.assertRaises(ValueError): + check(f'{s}', None) + with self.assertRaises(ValueError): check(f'{s}', None) def test_get_host_info(self): diff --git a/Lib/xmlrpc/client.py b/Lib/xmlrpc/client.py index 1ba11b2db9e75d..bef23f4505e03c 100644 --- a/Lib/xmlrpc/client.py +++ b/Lib/xmlrpc/client.py @@ -742,22 +742,14 @@ def end_boolean(self, data): dispatch["boolean"] = end_boolean def end_int(self, data): - if len(data.strip()) > 16: - # XML-RPC ints are signed int32 with 11 chars text max - raise OverflowError("int exceeds XML-RPC limits") self.append(int(data)) self._value = 0 - dispatch["i1"] = end_int dispatch["i2"] = end_int dispatch["i4"] = end_int dispatch["i8"] = end_int dispatch["int"] = end_int - - def end_bigint(self, data): - self.append(int(data)) - self._value = 0 - dispatch["biginteger"] = end_bigint + dispatch["biginteger"] = end_int def end_double(self, data): self.append(float(data)) From e9168454e4719e7a032e0de36eecb1b2525556b4 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Fri, 12 Aug 2022 17:25:50 -0700 Subject: [PATCH 29/45] Rearrange the new stdtypes docs, w/limits + caution. --- Doc/library/stdtypes.rst | 84 ++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 4eb75c4025ff5a..123d7c1cb8bbea 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5478,8 +5478,9 @@ The limit value is based on the number of digit characters in the input or output string. That means that higher bases can process larger numbers before the limit triggers. Underscores and the sign are not counted towards the limit. -When an operation exceeds the limit, a :exc:`ValueError` is raised:: +When an operation would exceed the limit, a :exc:`ValueError` is raised:: + >>> import sys >>> sys.set_int_max_str_digits(2048) >>> i = 10 ** 2047 >>> len(str(i)) @@ -5493,8 +5494,44 @@ When an operation exceeds the limit, a :exc:`ValueError` is raised:: This limit offers a practical way to avoid `CVE-2020-10735 `_. +The default limit is 4300 digits as seen in +:data:`sys.int_info.default_max_str_digits `. The smallest limit +is 640 digits as seen in :data:`sys.int_info.str_digits_check_threshold +`. + +Verification:: + + >>> import sys + >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info + >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info + >>> msg = int('379431350246233136746328250873855212517275894113083563419' + ... '189439726510664285115764751963294910345124558029500996970' + ... '709334425217234291').to_bytes(55, 'big') + .. versionadded:: 3.12 +Affected APIs +------------- + +The limition only applies to potentially slow conversions between :class:`int` +and :class:`str`: + +* ``int(string)`` with default base 10. +* ``int(string, base)`` for all bases that are not a power of 2. +* ``str(integer)``. +* ``repr(integer)`` +* any other string conversion to base 10, for example ``f"{integer}"``, + ``"{}".format(integer)``, or ``"%d" % integer``. + +The limitations do not apply to functions with a linear algorithm: + +* ``int(string, base)`` with base 2, 4, 8, 16, or 32. +* :func:`int.from_bytes` and :func:`int.to_bytes`. +* :func:`hex`, :func:`oct`, :func:`bin`. +* :ref:`formatspec` for hex, octal, and binary numbers. +* :class:`str` to :class:`float`. +* :class:`str` to :class:`decimal.Decimal`. + Configuring the limit --------------------- @@ -5502,10 +5539,10 @@ Before Python starts up you can use an environment variable or an interpreter command line flag to configure the limit: * :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. - ``PYTHONINTMAXSTRDIGITS=4321 python3`` to set the limit to ``4321`` or + ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to ``640`` or ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. * :option:`-X int_max_str_digits <-X>`, e.g. - ``python3 -X int_max_str_digits=4321`` + ``python3 -X int_max_str_digits=640`` * :data:`sys.flags.int_max_str_digits` contains the value of :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`. If both the env var and the ``-X`` option are set, the ``-X`` option takes @@ -5526,42 +5563,41 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: * :data:`sys.int_info.str_digits_check_threshold ` is the minimum accepted value for the limit. -Affected APIs -------------- +.. versionadded:: 3.12 -The limition only applies to potentially slow conversions between :class:`int` -and :class:`str`: +.. caution:: -* ``int(string)`` with default base 10. -* ``int(string, base)`` for all bases that are not a power of 2. -* ``str(integer)``. -* ``repr(integer)`` -* any other string conversion to base 10, for example ``f"{integer}"``, - ``"{}".format(integer)``, or ``"%d" % integer``. + Setting a low limit can lead to problems. While rare, code exists that + contains integer constants in decimal in their source that exceed the + minimum threshold. A consequence of setting the limit is that Python source + code containing decimal integer literals longer than the limit will + encounter a ValueError during compilation, usually at startup time or import + time or even at installation time - anytime an up to date ``.pyc`` does not + already exist for the code. A workaround for source that contains such large + constants is to convert them to ``0x`` hexidecimal form as it has no limit. -The limitations do not apply to functions with a linear algorithm: - -* ``int(string, base)`` with base 2, 4, 8, 16, or 32. -* :func:`int.from_bytes` and :func:`int.to_bytes`. -* :func:`hex`, :func:`oct`, :func:`bin`. -* :ref:`formatspec` for hex, octal, and binary numbers. -* :class:`str` to :class:`float`. -* :class:`str` to :class:`decimal.Decimal`. + Test your application thoroughly if you use a low limit. Ensure your tests + run with the limit set early via the environment or flag so that it applies + during startup and even during any installation step that may precompile + source to ``.pyc`` files. Recommended configuration ------------------------- The default :data:`sys.int_info.default_max_str_digits` is expected to be reasonable for most applications. If your application requires a different -limit, use Python version and implementation agnostic code to set it. +limit, use Python version and implementation agnostic code to set it from your +main entry point as these APIs were added in later patch releases before 3.12. Example:: >>> import sys >>> if hasattr(sys, "set_int_max_str_digits"): ... current_limit = sys.get_int_max_str_digits() - ... if not current_limit or current_limit > 4321: - ... sys.set_int_max_str_digits(4321) + ... if not current_limit or current_limit > 8088: + ... sys.set_int_max_str_digits(8088) + +If you need to disable it entirely, set it to ``0``. .. rubric:: Footnotes From 101502e3514fac060912137ada707a61365f56a6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 16 Aug 2022 01:11:40 -0700 Subject: [PATCH 30/45] Make a huge int a SyntaxError with lineno when parsing. --- Lib/test/test_ast.py | 8 ++++++++ Lib/test/test_compile.py | 13 +++++++++++++ Objects/longobject.c | 2 +- Parser/pegen.c | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 4cfefe4ac3dd1d..14b7df630f884c 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1136,6 +1136,14 @@ def test_literal_eval(self): self.assertRaises(ValueError, ast.literal_eval, '+True') self.assertRaises(ValueError, ast.literal_eval, '2+3') + def test_literal_eval_str_int_limit(self): + with support.set_int_max_str_digits(4000): + ast.literal_eval('3'*4000) # no error + with self.assertRaises(SyntaxError) as err_ctx: + ast.literal_eval('3'*4001) + self.assertIn('Exceeds digit limit', str(err_ctx.exception)) + self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) + def test_literal_eval_complex(self): # Issue #4907 self.assertEqual(ast.literal_eval('6j'), 6j) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index d7b78f686ef88d..71ea668b29ab7a 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -199,6 +199,19 @@ def test_literals_with_leading_zeroes(self): self.assertEqual(eval("0o777"), 511) self.assertEqual(eval("-0o0000010"), -8) + def test_int_literals_too_long(self): + n = 3000 + source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4" + with support.set_int_max_str_digits(n): + compile(source, "", "exec") # no errors. + with support.set_int_max_str_digits(n-1): + with self.assertRaises(SyntaxError) as err_ctx: + compile(source, "", "exec") + exc = err_ctx.exception + self.assertEqual(exc.lineno, 3) + self.assertIn('Exceeds digit limit', str(exc)) + self.assertIn(' Consider hexidecimal ', str(exc)) + def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 if sys.maxsize == 2147483647: diff --git a/Objects/longobject.c b/Objects/longobject.c index 04cf71fe4c3444..6367ef0c346847 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -36,7 +36,7 @@ medium_value(PyLongObject *x) #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS) -#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds digit limit for string conversions: value has %zd digits." +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds digit limit for string conversions: value has %zd digits" static inline void _Py_DECREF_INT(PyLongObject *op) diff --git a/Parser/pegen.c b/Parser/pegen.c index 31e3ec05a10530..f0b9fd0fea0f74 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1,5 +1,6 @@ #include #include "pycore_ast.h" // _PyAST_Validate(), +#include "pycore_pystate.h" // _PyThreadState_GET() #include #include "tokenizer.h" @@ -645,6 +646,21 @@ _PyPegen_number_token(Parser *p) if (c == NULL) { p->error_indicator = 1; + PyThreadState *tstate = _PyThreadState_GET(); + // The only way a ValueError should happen in _this_ code is via + // PyLong_FromString hitting a length limit. + if (tstate->curexc_type == PyExc_ValueError && + tstate->curexc_value != NULL) { + /* Intentionally omitting columns to avoid a giant wall of ^s on + * the error message. Nobody is going to overlook their huge + * numeric literal once given the line. */ + return RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, + t->lineno, -1 /* col_offset */, + t->end_lineno, -1 /* end_col_offset */, + "%S - Consider hexidecimal for huge integer literals to avoid decimal conversion limits.", + tstate->curexc_value); + } return NULL; } From fa8a58af285735e0032cddd43d79c31646a7d8ef Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 16 Aug 2022 11:13:37 -0700 Subject: [PATCH 31/45] Mention the chosen default in the NEWS entry. --- .../next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index 9194c5c1ff35a4..e21f9393f228c6 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -6,4 +6,4 @@ mitigation for `CVE-2020-10735 This new limit can be configured or disabled by environment variable, command line flag, or :mod:`sys` APIs. See the :ref:`int maximum digits limitation -` docs. +` docs. The default is 4300 digits. From 313ab6d5ca1ab40144c0d3a590c9ee633798ce1b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 16 Aug 2022 11:47:36 -0700 Subject: [PATCH 32/45] Properly clear & free the prior exception. --- Parser/pegen.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Parser/pegen.c b/Parser/pegen.c index f0b9fd0fea0f74..efcbcf2872b865 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -651,15 +651,22 @@ _PyPegen_number_token(Parser *p) // PyLong_FromString hitting a length limit. if (tstate->curexc_type == PyExc_ValueError && tstate->curexc_value != NULL) { - /* Intentionally omitting columns to avoid a giant wall of ^s on - * the error message. Nobody is going to overlook their huge + PyObject *type, *value, *tb; + // This acts as PyErr_Clear() as we're replacing curexc. + PyErr_Fetch(&type, &value, &tb); + Py_XDECREF(tb); + Py_DECREF(type); + /* Intentionally omitting columns to avoid a wall of 1000s of '^'s + * on the error message. Nobody is going to overlook their huge * numeric literal once given the line. */ - return RAISE_ERROR_KNOWN_LOCATION( + RAISE_ERROR_KNOWN_LOCATION( p, PyExc_SyntaxError, t->lineno, -1 /* col_offset */, t->end_lineno, -1 /* end_col_offset */, - "%S - Consider hexidecimal for huge integer literals to avoid decimal conversion limits.", - tstate->curexc_value); + "%S - Consider hexidecimal for huge integer literals " + "to avoid decimal conversion limits.", + value); + Py_DECREF(value); } return NULL; } From 614cd022de132949a158955e2882f38777b77fef Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 16 Aug 2022 23:05:37 -0700 Subject: [PATCH 33/45] Add a note to the float.as_integer_ratio() docs. --- Doc/library/stdtypes.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 123d7c1cb8bbea..06d93df090afe4 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -622,6 +622,12 @@ class`. float also has the following additional methods. :exc:`OverflowError` on infinities and a :exc:`ValueError` on NaNs. + .. note:: + + The integers returned by ``as_integer_ratio()`` can be huge. Attempts + to render such integers into decimal strings may bump into the + :ref:`int maximum digits limitation `. + .. method:: float.is_integer() Return ``True`` if the float instance is finite with integral From 16ad0902441d9aaf485e50532bd0143579019fe7 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Wed, 17 Aug 2022 15:20:21 -0700 Subject: [PATCH 34/45] Clarify the documentation wording and error msg. --- Doc/library/functions.rst | 6 +- Doc/library/json.rst | 8 +- Doc/library/stdtypes.rst | 86 +++++++++++-------- Doc/library/sys.rst | 10 +-- Doc/using/cmdline.rst | 12 +-- Doc/whatsnew/3.12.rst | 17 ++-- Lib/test/test_ast.py | 2 +- Lib/test/test_compile.py | 2 +- ...22-08-07-16-53.gh-issue-95778.ch010gps.rst | 14 +-- Objects/longobject.c | 6 +- 10 files changed, 93 insertions(+), 70 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index f926e4360264b8..b9cf02e87eb614 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -914,9 +914,9 @@ are always available. They are listed here in alphabetical order. :class:`int` string inputs and string representations can be limited to help avoid denial of service attacks. A :exc:`ValueError` is raised when the limit is exceeded while converting a string *x* to an :class:`int` or - when converting a :class:`int` into a string would exceed the limit. See - :ref:`int maximum digits limitation ` for more - information. + when converting an :class:`int` into a string would exceed the limit. + See the :ref:`integer string conversion length limitation + ` documentation. .. function:: isinstance(object, classinfo) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 02a422b433e5e7..d05d62e78cc71e 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -259,10 +259,10 @@ Basic Usage (e.g. :class:`float`). .. versionchanged:: 3.12 - The default implementation of *parse_int* limits the maximum length of - the integer string via the interpreter's :ref:`int maximum digits - limitation ` mechanism to help avoid denial of - service attacks. + The default *parse_int* of :func:`int` now limits the maximum length of + the integer string via the interpreter's :ref:`integer string + conversion length limitation ` to help avoid denial + of service attacks. *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 06d93df090afe4..71062b6d681d29 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -624,9 +624,10 @@ class`. float also has the following additional methods. .. note:: - The integers returned by ``as_integer_ratio()`` can be huge. Attempts + The values returned by ``as_integer_ratio()`` can be huge. Attempts to render such integers into decimal strings may bump into the - :ref:`int maximum digits limitation `. + :ref:`integer string conversion length limitation + `. .. method:: float.is_integer() @@ -5468,37 +5469,47 @@ types, where they are relevant. Some of these are not reported by the .. _int_max_str_digits: -Integer maximum digits limitation -================================= +Integer string conversion length limitation +=========================================== CPython has a global limit for converting between :class:`int` and :class:`str` -to mitigate denial of service attacks. The limit is necessary because CPython's -integer type is an abitrary length number (commonly known as a bignum) stored -in binary form. There exists no algorithm that can convert a string to a binary -integer or a binary integer to a string in linear time, unless the base is a -power of *2*. Even the best known algorithms for base *10* have sub-quadratic -complexity. Converting a large value such as ``int('1' * 500_000)`` can take -over a second on a fast CPU. - -The limit value is based on the number of digit characters in the input or -output string. That means that higher bases can process larger numbers before -the limit triggers. Underscores and the sign are not counted towards the limit. +to mitigate denial of service attacks. This limit *only* applies to +non-power-of-two number bases such as decimal. Hexidecimal, octal, and binary +are not limited. The limit can be configured. + +The limit is necessary because CPython's integer type is an abitrary length +number (commonly known as a bignum) stored in binary form. There exists no +algorithm that can convert a string to a binary integer or a binary integer to +a string in linear time, unless the base is a power of 2. Even the best known +algorithms for base 10 have sub-quadratic complexity. Converting a large value +such as ``int('1' * 500_000)`` can take over a second on a fast CPU. + +Limiting conversion size offers a practical way to avoid `CVE-2020-10735 +`_. + +The limit is applied to the number of digit characters in the input or output +string. That means that higher bases can process larger numbers before the +limit triggers. Underscores and the sign are not counted towards the limit. When an operation would exceed the limit, a :exc:`ValueError` is raised:: >>> import sys - >>> sys.set_int_max_str_digits(2048) - >>> i = 10 ** 2047 - >>> len(str(i)) - 2048 - >>> i = 10 ** 2048 + >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. + >>> _ = int('2' * 5432) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits. + >>> i = int('2' * 4300) >>> len(str(i)) + 4300 + >>> i_squared = i*i + >>> len(str(i_squared)) Traceback (most recent call last): ... - ValueError: Exceeds digit limit for string conversions: value has 2049 digits. - -This limit offers a practical way to avoid `CVE-2020-10735 -`_. + ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. + >>> len(hex(i_squared)) + 7144 + >>> assert int(hex(i_squared), base=16) == i The default limit is 4300 digits as seen in :data:`sys.int_info.default_max_str_digits `. The smallest limit @@ -5510,9 +5521,10 @@ Verification:: >>> import sys >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info - >>> msg = int('379431350246233136746328250873855212517275894113083563419' - ... '189439726510664285115764751963294910345124558029500996970' - ... '709334425217234291').to_bytes(55, 'big') + >>> msg = int('578966293710682886880994035146873798396722250538762761564' + ... '9252925514383915483333812743580549779436104706260696366600' + ... '571186405732').to_bytes(53, 'big') + ... .. versionadded:: 3.12 @@ -5573,13 +5585,13 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: .. caution:: - Setting a low limit can lead to problems. While rare, code exists that + Setting a low limit *can* lead to problems. While rare, code exists that contains integer constants in decimal in their source that exceed the minimum threshold. A consequence of setting the limit is that Python source code containing decimal integer literals longer than the limit will - encounter a ValueError during compilation, usually at startup time or import - time or even at installation time - anytime an up to date ``.pyc`` does not - already exist for the code. A workaround for source that contains such large + encounter an error during parsing, usually at startup time or import time or + even at installation time - anytime an up to date ``.pyc`` does not already + exist for the code. A workaround for source that contains such large constants is to convert them to ``0x`` hexidecimal form as it has no limit. Test your application thoroughly if you use a low limit. Ensure your tests @@ -5592,16 +5604,20 @@ Recommended configuration The default :data:`sys.int_info.default_max_str_digits` is expected to be reasonable for most applications. If your application requires a different -limit, use Python version and implementation agnostic code to set it from your -main entry point as these APIs were added in later patch releases before 3.12. +limit, set it from your main entry point using Python version agnostic code as +these APIs were added in patch releases before 3.12. Example:: >>> import sys >>> if hasattr(sys, "set_int_max_str_digits"): + ... upper_bound = 68000 + ... lower_bound = 4004 ... current_limit = sys.get_int_max_str_digits() - ... if not current_limit or current_limit > 8088: - ... sys.set_int_max_str_digits(8088) + ... if current_limit == 0 or current_limit > upper_bound: + ... sys.set_int_max_str_digits(upper_bound) + ... elif current_limit < lower_bound: + ... sys.set_int_max_str_digits(lower_bound) If you need to disable it entirely, set it to ``0``. diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 1be19794ed1946..7a7bbac7d67e24 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -521,7 +521,7 @@ always available. :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) :const:`utf8_mode` :option:`-X utf8 <-X>` :const:`safe_path` :option:`-P` - :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`int maximum digits limitation `) + :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) ================================== ====================================================================================================== .. versionchanged:: 3.2 @@ -729,8 +729,8 @@ always available. .. function:: get_int_max_str_digits() - Return current global value for :ref:`int maximum digits limitation - `. See also :func:`set_int_max_str_digits` + Return current global value for :ref:`integer string conversion length + limitation `. See also :func:`set_int_max_str_digits` .. versionadded:: 3.12 @@ -1333,8 +1333,8 @@ always available. .. function:: set_int_max_str_digits(n) - Set global interpreter limit for :ref:`int maximum digits limitation - `. See also :func:`get_int_max_str_digits` + Set global interpreter limit for :ref:`integer string conversion length + limitation `. See also :func:`get_int_max_str_digits` .. versionadded:: 3.12 diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 40048f076f0642..6a33d98a059a09 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -505,8 +505,9 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. - * ``-X int_max_str_digits`` configures :ref:`int maximum digits limitation - `. See also :envvar:`PYTHONINTMAXSTRDIGITS`. + * ``-X int_max_str_digits`` configures the :ref:`integer string conversion + length limitation `. See also + :envvar:`PYTHONINTMAXSTRDIGITS`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded @@ -770,10 +771,11 @@ conflict. .. envvar:: PYTHONINTMAXSTRDIGITS - If this variable is set to an integer, it is used to configure the interpreter's - global :ref:`int maximum digits limitation `. + If this variable is set to an integer, it is used to configure the + interpreter's global :ref:`integer string conversion length limitation + `. - .. versionadded:: 3.11 + .. versionadded:: 3.12 .. envvar:: PYTHONIOENCODING diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 03193d0b75cc0a..e6dd66f3d2a6a7 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -83,13 +83,16 @@ Other Language Changes mapping is hashable. (Contributed by Serhiy Storchaka in :gh:`87995`.) -* Converting between :class:`int` and :class:`str` in non binary multiple bases - such as human friendly base 10 now limits the maximum number of string digits - by default to avoid potential denial of service attacks. This is a mitigation - for `CVE-2020-10735 - `_. The limit - can be configured by environment variable, command line flag, or :mod:`sys` - APIs. See the :ref:`int maximum digits limitation ` docs. +* Converting between :class:`int` and :class:`str` in bases other than 2 + (binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) + now raises a :exc:`ValueError` if the number of digits in string form is + above a limit to avoid potential denial of service attacks due to the + algorithmic complexity. This is a mitigation for `CVE-2020-10735 + `_. + This limit can be configured or disabled by environment variable, command + line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion + length limitation ` documentation. The default limit + is 4300 digits in string form. New Modules diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 14b7df630f884c..3e5a728651a29d 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1141,7 +1141,7 @@ def test_literal_eval_str_int_limit(self): ast.literal_eval('3'*4000) # no error with self.assertRaises(SyntaxError) as err_ctx: ast.literal_eval('3'*4001) - self.assertIn('Exceeds digit limit', str(err_ctx.exception)) + self.assertIn('Exceeds the limit ', str(err_ctx.exception)) self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) def test_literal_eval_complex(self): diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 71ea668b29ab7a..f75d60f7549bc2 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -209,7 +209,7 @@ def test_int_literals_too_long(self): compile(source, "", "exec") exc = err_ctx.exception self.assertEqual(exc.lineno, 3) - self.assertIn('Exceeds digit limit', str(exc)) + self.assertIn('Exceeds the limit ', str(exc)) self.assertIn(' Consider hexidecimal ', str(exc)) def test_unary_minus(self): diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index e21f9393f228c6..a69e879df7574b 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -1,9 +1,11 @@ -Converting between :class:`int` and :class:`str` in non binary multiple bases -(2, 4, 8, 16, & 32) such as base 10 now limits the maximum number of string -digits by default to avoid potential denial of service attacks. This is a -mitigation for `CVE-2020-10735 +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) now +raises a :exc:`ValueError` if the number of digits in string form is above a +limit to avoid potential denial of service attacks due to the algorithmic +complexity. This is a mitigation for `CVE-2020-10735 `_. This new limit can be configured or disabled by environment variable, command -line flag, or :mod:`sys` APIs. See the :ref:`int maximum digits limitation -` docs. The default is 4300 digits. +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length +limitation ` documentation. The default limit is 4300 +digits in string form. diff --git a/Objects/longobject.c b/Objects/longobject.c index 6367ef0c346847..6c6e2ea919682e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -36,7 +36,7 @@ medium_value(PyLongObject *x) #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS) -#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds digit limit for string conversions: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" static inline void _Py_DECREF_INT(PyLongObject *op) @@ -1824,7 +1824,7 @@ long_to_decimal_string_internal(PyObject *aa, if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - strlen_nosign); + max_str_digits, strlen_nosign); return -1; } } @@ -2499,7 +2499,7 @@ digit beyond the first. int max_str_digits = interp->int_max_str_digits; if ((max_str_digits > 0) && (digits > max_str_digits)) { PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - digits); + max_str_digits, digits); return NULL; } } From 4eb72e6ce5571188e51061479feda9830192a1c1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Wed, 17 Aug 2022 15:54:52 -0700 Subject: [PATCH 35/45] Fix test_idle, it used a long int on a line. --- Lib/idlelib/idle_test/test_sidebar.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/idlelib/idle_test/test_sidebar.py b/Lib/idlelib/idle_test/test_sidebar.py index 290e037fe727a1..27e5c8c071dab1 100644 --- a/Lib/idlelib/idle_test/test_sidebar.py +++ b/Lib/idlelib/idle_test/test_sidebar.py @@ -6,6 +6,7 @@ import unittest import unittest.mock from test.support import requires, swap_attr +from test import support import tkinter as tk from idlelib.idle_test.tkinter_testing_utils import run_in_tk_mainloop @@ -612,7 +613,8 @@ def test_interrupt_recall_undo_redo(self): @run_in_tk_mainloop() def test_very_long_wrapped_line(self): - with swap_attr(self.shell, 'squeezer', None): + with support.set_int_max_str_digits(11_111), \ + swap_attr(self.shell, 'squeezer', None): self.do_input('x = ' + '1'*10_000 + '\n') yield self.assertEqual(self.get_sidebar_lines(), ['>>>']) From da365505c26d97eea8a607e9ed36a946974590a8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 18 Aug 2022 18:33:18 -0700 Subject: [PATCH 36/45] Rename the test.support context manager and document it. --- Doc/library/test.rst | 10 ++++++++++ Lib/idlelib/idle_test/test_sidebar.py | 2 +- Lib/test/support/__init__.py | 4 ++-- Lib/test/test_ast.py | 2 +- Lib/test/test_compile.py | 4 ++-- Lib/test/test_int.py | 2 +- Lib/test/test_json/test_decode.py | 2 +- Lib/test/test_xmlrpc.py | 2 +- 8 files changed, 19 insertions(+), 9 deletions(-) diff --git a/Doc/library/test.rst b/Doc/library/test.rst index f3bc7e7560a66c..eff375132318a9 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -1011,6 +1011,16 @@ The :mod:`test.support` module defines the following functions: .. versionadded:: 3.10 +.. function:: adjust_int_max_str_digits(max_digits) + + This function returns a context manager that will change the global + :func:`sys.set_int_max_str_digits` setting for the duration of the + context to allow execution of test code that needs a different limit + on the number of digits when converting between an integer and string. + + .. versionadded:: 3.12 + + The :mod:`test.support` module defines the following classes: diff --git a/Lib/idlelib/idle_test/test_sidebar.py b/Lib/idlelib/idle_test/test_sidebar.py index 27e5c8c071dab1..049531e66a414e 100644 --- a/Lib/idlelib/idle_test/test_sidebar.py +++ b/Lib/idlelib/idle_test/test_sidebar.py @@ -613,7 +613,7 @@ def test_interrupt_recall_undo_redo(self): @run_in_tk_mainloop() def test_very_long_wrapped_line(self): - with support.set_int_max_str_digits(11_111), \ + with support.adjust_int_max_str_digits(11_111), \ swap_attr(self.shell, 'squeezer', None): self.do_input('x = ' + '1'*10_000 + '\n') yield diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 6142e6d3094532..573dce52ca474a 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2344,8 +2344,8 @@ def sleeping_retry(timeout, err_msg=None, /, @contextlib.contextmanager -def set_int_max_str_digits(max_digits): - """Temporarily change the int<->str maximum digits limit.""" +def adjust_int_max_str_digits(max_digits): + """Temporarily change the integer string conversion length limit.""" current = sys.get_int_max_str_digits() try: sys.set_int_max_str_digits(max_digits) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 3e5a728651a29d..942d67e8a2eaa2 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1137,7 +1137,7 @@ def test_literal_eval(self): self.assertRaises(ValueError, ast.literal_eval, '2+3') def test_literal_eval_str_int_limit(self): - with support.set_int_max_str_digits(4000): + with support.adjust_int_max_str_digits(4000): ast.literal_eval('3'*4000) # no error with self.assertRaises(SyntaxError) as err_ctx: ast.literal_eval('3'*4001) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index f75d60f7549bc2..9fb119259211a1 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -202,9 +202,9 @@ def test_literals_with_leading_zeroes(self): def test_int_literals_too_long(self): n = 3000 source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4" - with support.set_int_max_str_digits(n): + with support.adjust_int_max_str_digits(n): compile(source, "", "exec") # no errors. - with support.set_int_max_str_digits(n-1): + with support.adjust_int_max_str_digits(n-1): with self.assertRaises(SyntaxError) as err_ctx: compile(source, "", "exec") exc = err_ctx.exception diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index ef8c04efdb2c6f..e9561b02fcac7b 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -593,7 +593,7 @@ def tearDown(self): def test_disabled_limit(self): self.assertGreater(sys.get_int_max_str_digits(), 0) self.assertLess(sys.get_int_max_str_digits(), 20_000) - with support.set_int_max_str_digits(0): + with support.adjust_int_max_str_digits(0): self.assertEqual(sys.get_int_max_str_digits(), 0) i = self.int_class('1' * 20_000) str(i) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 618d72eecc9fda..124045b13184b3 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -98,7 +98,7 @@ def test_negative_index(self): def test_limit_int(self): maxdigits = 5000 - with support.set_int_max_str_digits(maxdigits): + with support.adjust_int_max_str_digits(maxdigits): self.loads('1' * maxdigits) with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 5be964a0acd7e4..9ff5545f786a32 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -292,7 +292,7 @@ def test_load_extension_types(self): def test_limit_int(self): check = self.check_loads maxdigits = 5000 - with support.set_int_max_str_digits(maxdigits): + with support.adjust_int_max_str_digits(maxdigits): s = '1' * (maxdigits + 1) with self.assertRaises(ValueError): check(f'{s}', None) From f4372cc5cd6055436c04ab8c13e5bd0f4975d3cd Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 18 Aug 2022 23:17:58 -0700 Subject: [PATCH 37/45] Documentation cleanup. --- Doc/library/stdtypes.rst | 48 ++++++++++++++++++------------------- Doc/library/sys.rst | 51 ++++++++++++++++++++-------------------- 2 files changed, 50 insertions(+), 49 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 71062b6d681d29..4d804ba94b8300 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5473,23 +5473,23 @@ Integer string conversion length limitation =========================================== CPython has a global limit for converting between :class:`int` and :class:`str` -to mitigate denial of service attacks. This limit *only* applies to -non-power-of-two number bases such as decimal. Hexidecimal, octal, and binary -are not limited. The limit can be configured. +to mitigate denial of service attacks. This limit *only* applies to decimal or +other non-power-of-two number bases. Hexidecimal, octal, and binary conversions +are unlimited. The limit can be configured. -The limit is necessary because CPython's integer type is an abitrary length -number (commonly known as a bignum) stored in binary form. There exists no -algorithm that can convert a string to a binary integer or a binary integer to -a string in linear time, unless the base is a power of 2. Even the best known -algorithms for base 10 have sub-quadratic complexity. Converting a large value -such as ``int('1' * 500_000)`` can take over a second on a fast CPU. +The :class:`int` type in CPython is an abitrary length number stored in binary +form (commonly known as a "bignum"). There exists no algorithm that can convert +a string to a binary integer or a binary integer to a string in linear time, +*unless* the base is a power of 2. Even the best known algorithms for base 10 +have sub-quadratic complexity. Converting a large value such as ``int('1' * +500_000)`` can take over a second on a fast CPU. Limiting conversion size offers a practical way to avoid `CVE-2020-10735 `_. The limit is applied to the number of digit characters in the input or output -string. That means that higher bases can process larger numbers before the -limit triggers. Underscores and the sign are not counted towards the limit. +string when a non-linear conversion algorithm would be involved. Underscores +and the sign are not counted towards the limit. When an operation would exceed the limit, a :exc:`ValueError` is raised:: @@ -5509,12 +5509,12 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised:: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i + >>> assert int(hex(i_squared), base=16) == i # Hexidecimal is unlimited. -The default limit is 4300 digits as seen in -:data:`sys.int_info.default_max_str_digits `. The smallest limit -is 640 digits as seen in :data:`sys.int_info.str_digits_check_threshold -`. +The default limit is 4300 digits as provided in +:data:`sys.int_info.default_max_str_digits `. +The lowest limit that can be configured is 640 digits as provided in +:data:`sys.int_info.str_digits_check_threshold `. Verification:: @@ -5532,14 +5532,14 @@ Affected APIs ------------- The limition only applies to potentially slow conversions between :class:`int` -and :class:`str`: +and :class:`str` or :class:`bytes`: * ``int(string)`` with default base 10. * ``int(string, base)`` for all bases that are not a power of 2. * ``str(integer)``. * ``repr(integer)`` * any other string conversion to base 10, for example ``f"{integer}"``, - ``"{}".format(integer)``, or ``"%d" % integer``. + ``"{}".format(integer)``, or ``b"%d" % integer``. The limitations do not apply to functions with a linear algorithm: @@ -5557,7 +5557,7 @@ Before Python starts up you can use an environment variable or an interpreter command line flag to configure the limit: * :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. - ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to ``640`` or + ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to 640 or ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. * :option:`-X int_max_str_digits <-X>`, e.g. ``python3 -X int_max_str_digits=640`` @@ -5578,8 +5578,8 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: * :data:`sys.int_info.default_max_str_digits ` is the compiled-in default limit. -* :data:`sys.int_info.str_digits_check_threshold ` is the minimum - accepted value for the limit. +* :data:`sys.int_info.str_digits_check_threshold ` is the lowest + accepted value for the limit (other than 0 which disables it). .. versionadded:: 3.12 @@ -5596,8 +5596,8 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: Test your application thoroughly if you use a low limit. Ensure your tests run with the limit set early via the environment or flag so that it applies - during startup and even during any installation step that may precompile - source to ``.pyc`` files. + during startup and even during any installation step that may invoke Python + to precompile ``.py`` sources to ``.pyc`` files. Recommended configuration ------------------------- @@ -5605,7 +5605,7 @@ Recommended configuration The default :data:`sys.int_info.default_max_str_digits` is expected to be reasonable for most applications. If your application requires a different limit, set it from your main entry point using Python version agnostic code as -these APIs were added in patch releases before 3.12. +these APIs were added in security patch releases in versions before 3.12. Example:: diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 7a7bbac7d67e24..cc41b996d2dad5 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -502,27 +502,27 @@ always available. The :term:`named tuple` *flags* exposes the status of command line flags. The attributes are read only. - ================================== ====================================================================================================== - attribute flag - ================================== ====================================================================================================== - :const:`debug` :option:`-d` - :const:`inspect` :option:`-i` - :const:`interactive` :option:`-i` - :const:`isolated` :option:`-I` - :const:`optimize` :option:`-O` or :option:`-OO` - :const:`dont_write_bytecode` :option:`-B` - :const:`no_user_site` :option:`-s` - :const:`no_site` :option:`-S` - :const:`ignore_environment` :option:`-E` - :const:`verbose` :option:`-v` - :const:`bytes_warning` :option:`-b` - :const:`quiet` :option:`-q` - :const:`hash_randomization` :option:`-R` - :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) - :const:`utf8_mode` :option:`-X utf8 <-X>` - :const:`safe_path` :option:`-P` - :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) - ================================== ====================================================================================================== + ============================= ============================================================================================================== + attribute flag + ============================= ============================================================================================================== + :const:`debug` :option:`-d` + :const:`inspect` :option:`-i` + :const:`interactive` :option:`-i` + :const:`isolated` :option:`-I` + :const:`optimize` :option:`-O` or :option:`-OO` + :const:`dont_write_bytecode` :option:`-B` + :const:`no_user_site` :option:`-s` + :const:`no_site` :option:`-S` + :const:`ignore_environment` :option:`-E` + :const:`verbose` :option:`-v` + :const:`bytes_warning` :option:`-b` + :const:`quiet` :option:`-q` + :const:`hash_randomization` :option:`-R` + :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) + :const:`utf8_mode` :option:`-X utf8 <-X>` + :const:`safe_path` :option:`-P` + :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) + ============================= ============================================================================================================== .. versionchanged:: 3.2 Added ``quiet`` attribute for the new :option:`-q` flag. @@ -729,8 +729,8 @@ always available. .. function:: get_int_max_str_digits() - Return current global value for :ref:`integer string conversion length - limitation `. See also :func:`set_int_max_str_digits` + Returns the current value for the :ref:`integer string conversion length + limitation `. See also :func:`set_int_max_str_digits`. .. versionadded:: 3.12 @@ -1333,8 +1333,9 @@ always available. .. function:: set_int_max_str_digits(n) - Set global interpreter limit for :ref:`integer string conversion length - limitation `. See also :func:`get_int_max_str_digits` + Set the :ref:`integer string conversion length limitation + ` used by this interpreter. See also + :func:`get_int_max_str_digits`. .. versionadded:: 3.12 From c421853ee7be8acf5fbeed3dd4d07eb3573c6ce1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 25 Aug 2022 16:50:19 -0700 Subject: [PATCH 38/45] Update attribution in Misc/NEWS.d Co-authored-by: Christian Heimes --- .../next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index a69e879df7574b..a205fb31ad7b95 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -9,3 +9,6 @@ This new limit can be configured or disabled by environment variable, command line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length limitation ` documentation. The default limit is 4300 digits in string form. + +Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback from +Victor Stinner, Thomas Wouters, and Steve Dower. From 9f2168ad1695e376aff88de06e9dbcf89f6b9e4a Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 1 Sep 2022 09:37:24 +0200 Subject: [PATCH 39/45] Regen global strings --- Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init_generated.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index c736bfecd077fd..03f6b90e28ee2b 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -451,6 +451,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(mapping) STRUCT_FOR_ID(match) STRUCT_FOR_ID(max_length) + STRUCT_FOR_ID(maxdigits) STRUCT_FOR_ID(maxevents) STRUCT_FOR_ID(maxmem) STRUCT_FOR_ID(maxsplit) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 58d9e934b96c19..65ab098c1a2f0d 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -960,6 +960,7 @@ extern "C" { INIT_ID(mapping), \ INIT_ID(match), \ INIT_ID(max_length), \ + INIT_ID(maxdigits), \ INIT_ID(maxevents), \ INIT_ID(maxmem), \ INIT_ID(maxsplit), \ @@ -2224,6 +2225,8 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(max_length); PyUnicode_InternInPlace(&string); + string = &_Py_ID(maxdigits); + PyUnicode_InternInPlace(&string); string = &_Py_ID(maxevents); PyUnicode_InternInPlace(&string); string = &_Py_ID(maxmem); @@ -6373,6 +6376,10 @@ _PyStaticObjects_CheckRefcnt(void) { _PyObject_Dump((PyObject *)&_Py_ID(max_length)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); }; + if (Py_REFCNT((PyObject *)&_Py_ID(maxdigits)) < _PyObject_IMMORTAL_REFCNT) { + _PyObject_Dump((PyObject *)&_Py_ID(maxdigits)); + Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); + }; if (Py_REFCNT((PyObject *)&_Py_ID(maxevents)) < _PyObject_IMMORTAL_REFCNT) { _PyObject_Dump((PyObject *)&_Py_ID(maxevents)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); From 3c8504bd54c22ac12faa97523669758b95583f56 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 16:57:08 -0700 Subject: [PATCH 40/45] Make the doctest actually run & fix it. --- Doc/library/stdtypes.rst | 4 +++- .../Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 4d804ba94b8300..f69a9d36e6dbc0 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5493,6 +5493,8 @@ and the sign are not counted towards the limit. When an operation would exceed the limit, a :exc:`ValueError` is raised:: +.. doctest:: + >>> import sys >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. >>> _ = int('2' * 5432) @@ -5509,7 +5511,7 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised:: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i # Hexidecimal is unlimited. + >>> assert int(hex(i_squared), base=16) == i*i # Hexidecimal is unlimited. The default limit is 4300 digits as provided in :data:`sys.int_info.default_max_str_digits `. diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index a205fb31ad7b95..f9386b2ac140da 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -10,5 +10,5 @@ line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length limitation ` documentation. The default limit is 4300 digits in string form. -Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback from -Victor Stinner, Thomas Wouters, and Steve Dower. +Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback +from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. From 15864199e4f3bc596bcfd065ff3c6d334074ec5c Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 17:22:01 -0700 Subject: [PATCH 41/45] Fix the docs build. --- Doc/library/stdtypes.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index f69a9d36e6dbc0..b01c3438d879b5 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5491,7 +5491,7 @@ The limit is applied to the number of digit characters in the input or output string when a non-linear conversion algorithm would be involved. Underscores and the sign are not counted towards the limit. -When an operation would exceed the limit, a :exc:`ValueError` is raised:: +When an operation would exceed the limit, a :exc:`ValueError` is raised: .. doctest:: @@ -5518,7 +5518,9 @@ The default limit is 4300 digits as provided in The lowest limit that can be configured is 640 digits as provided in :data:`sys.int_info.str_digits_check_threshold `. -Verification:: +Verification: + +.. doctest:: >>> import sys >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info From 94bd3eefb52d61240e5ca19760486dcbd6681a1e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 21:47:15 -0700 Subject: [PATCH 42/45] Rename the news file to appease the Bedevere bot. --- ...010gps.rst => 2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Security/{2022-08-07-16-53.gh-issue-95778.ch010gps.rst => 2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst} (100%) diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst similarity index 100% rename from Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst rename to Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst From 0b91f6580a0cb732b081bc0553dae6e53eff7fce Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 22:33:52 -0700 Subject: [PATCH 43/45] Regen argument clinic after the rebase merge. --- Python/clinic/sysmodule.c.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 6d6d93aa5f10a1..6864b8b0e03b2f 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -779,8 +779,31 @@ static PyObject * sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(maxdigits), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + static const char * const _keywords[] = {"maxdigits", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_int_max_str_digits", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE PyObject *argsbuf[1]; int maxdigits; @@ -1320,4 +1343,4 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=27e4e7e2264ea035 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=15318cdd96b62b06 input=a9049054013a1b77]*/ From 02776f9574f5e7a12988e369640b6a3c395de7d3 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Fri, 2 Sep 2022 16:55:41 +0200 Subject: [PATCH 44/45] Hexi hexa Co-authored-by: Steve Dower --- Doc/library/stdtypes.rst | 6 +++--- Doc/whatsnew/3.12.rst | 2 +- Lib/test/test_ast.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b01c3438d879b5..163ac704138927 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5474,7 +5474,7 @@ Integer string conversion length limitation CPython has a global limit for converting between :class:`int` and :class:`str` to mitigate denial of service attacks. This limit *only* applies to decimal or -other non-power-of-two number bases. Hexidecimal, octal, and binary conversions +other non-power-of-two number bases. Hexadecimal, octal, and binary conversions are unlimited. The limit can be configured. The :class:`int` type in CPython is an abitrary length number stored in binary @@ -5511,7 +5511,7 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i*i # Hexidecimal is unlimited. + >>> assert int(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited. The default limit is 4300 digits as provided in :data:`sys.int_info.default_max_str_digits `. @@ -5596,7 +5596,7 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: encounter an error during parsing, usually at startup time or import time or even at installation time - anytime an up to date ``.pyc`` does not already exist for the code. A workaround for source that contains such large - constants is to convert them to ``0x`` hexidecimal form as it has no limit. + constants is to convert them to ``0x`` hexadecimal form as it has no limit. Test your application thoroughly if you use a low limit. Ensure your tests run with the limit set early via the environment or flag so that it applies diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index e6dd66f3d2a6a7..70a1104127e9a8 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -84,7 +84,7 @@ Other Language Changes (Contributed by Serhiy Storchaka in :gh:`87995`.) * Converting between :class:`int` and :class:`str` in bases other than 2 - (binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) + (binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now raises a :exc:`ValueError` if the number of digits in string form is above a limit to avoid potential denial of service attacks due to the algorithmic complexity. This is a mitigation for `CVE-2020-10735 diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 942d67e8a2eaa2..68617b10e36f25 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1142,7 +1142,7 @@ def test_literal_eval_str_int_limit(self): with self.assertRaises(SyntaxError) as err_ctx: ast.literal_eval('3'*4001) self.assertIn('Exceeds the limit ', str(err_ctx.exception)) - self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) + self.assertIn(' Consider hexadecimal ', str(err_ctx.exception)) def test_literal_eval_complex(self): # Issue #4907 From 173fa4e707f95eedb0d0ea785de6722af92be262 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Fri, 2 Sep 2022 16:57:00 +0200 Subject: [PATCH 45/45] Hexi hexa 2 Co-authored-by: Steve Dower --- Lib/test/test_compile.py | 2 +- .../Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst | 2 +- Parser/pegen.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 9fb119259211a1..3ed57c2a5d2763 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -210,7 +210,7 @@ def test_int_literals_too_long(self): exc = err_ctx.exception self.assertEqual(exc.lineno, 3) self.assertIn('Exceeds the limit ', str(exc)) - self.assertIn(' Consider hexidecimal ', str(exc)) + self.assertIn(' Consider hexadecimal ', str(exc)) def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index f9386b2ac140da..ea3b85d632e083 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -1,5 +1,5 @@ Converting between :class:`int` and :class:`str` in bases other than 2 -(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) now +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now raises a :exc:`ValueError` if the number of digits in string form is above a limit to avoid potential denial of service attacks due to the algorithmic complexity. This is a mitigation for `CVE-2020-10735 diff --git a/Parser/pegen.c b/Parser/pegen.c index efcbcf2872b865..a5d123da51296c 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -663,7 +663,7 @@ _PyPegen_number_token(Parser *p) p, PyExc_SyntaxError, t->lineno, -1 /* col_offset */, t->end_lineno, -1 /* end_col_offset */, - "%S - Consider hexidecimal for huge integer literals " + "%S - Consider hexadecimal for huge integer literals " "to avoid decimal conversion limits.", value); Py_DECREF(value);