Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-95778: Add pre-check for int-to-str conversion #96537

Merged
merged 14 commits into from
Sep 4, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ extern "C" {
* everyone's existing deployed numpy test suite passes before
* https://github.com/numpy/numpy/issues/22098 is widely available.
*
* $ python -m timeit -s 's = * "1"*4300' 'int(s)'
* $ python -m timeit -s 's = "1"*4300' 'int(s)'
* 2000 loops, best of 5: 125 usec per loop
* $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)'
* $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)'
* 1000 loops, best of 5: 311 usec per loop
* (zen2 cloud VM)
*
Expand Down
82 changes: 82 additions & 0 deletions Lib/test/test_int.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import time

import unittest
from test import support
Expand Down Expand Up @@ -632,6 +633,87 @@ def test_max_str_digits(self):
with self.assertRaises(ValueError):
str(i)

def test_denial_of_service_prevented_int_to_str(self):
"""Regression test: ensure we fail before performing O(N**2) work."""
maxdigits = sys.get_int_max_str_digits()
assert maxdigits < 50_000, maxdigits # A test prerequisite.
get_time = time.process_time
if get_time() <= 0: # some platforms like WASM lack process_time()
get_time = time.monotonic

huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits.
digits = 78_268
with support.adjust_int_max_str_digits(digits):
start = get_time()
huge_decimal = str(huge_int)
seconds_to_convert = get_time() - start
self.assertEqual(len(huge_decimal), digits)
# Ensuring that we chose a slow enough conversion to measure.
# It takes 0.1 seconds on a Zen based cloud VM in an opt build.
if seconds_to_convert < 0.005:
raise unittest.SkipTest('"slow" conversion took only '
f'{seconds_to_convert} seconds.')

# We test with the limit almost at the size needed to check performance.
# The performant limit check is slightly fuzzy, give it a some room.
with support.adjust_int_max_str_digits(int(.995 * digits)):
with self.assertRaises(ValueError) as err:
start = get_time()
str(huge_int)
seconds_to_fail_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_huge, seconds_to_convert/8)

# Now we test that a conversion that would take 30x as long also fails
# in a similarly fast fashion.
extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits.
with self.assertRaises(ValueError) as err:
start = get_time()
# If not limited, 8 seconds said Zen based cloud VM.
str(extra_huge_int)
seconds_to_fail_extra_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8)

def test_denial_of_service_prevented_str_to_int(self):
"""Regression test: ensure we fail before performing O(N**2) work."""
maxdigits = sys.get_int_max_str_digits()
assert maxdigits < 100_000, maxdigits # A test prerequisite.
get_time = time.process_time
if get_time() <= 0: # some platforms like WASM lack process_time()
get_time = time.monotonic

digits = 133700
huge = '8'*digits
with support.adjust_int_max_str_digits(digits):
start = get_time()
int(huge)
seconds_to_convert = get_time() - start
# Ensuring that we chose a slow enough conversion to measure.
# It takes 0.1 seconds on a Zen based cloud VM in an opt build.
if seconds_to_convert < 0.005:
raise unittest.SkipTest('"slow" conversion took only '
f'{seconds_to_convert} seconds.')

with support.adjust_int_max_str_digits(digits - 1):
with self.assertRaises(ValueError) as err:
start = get_time()
int(huge)
seconds_to_fail_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_huge, seconds_to_convert/8)

# Now we test that a conversion that would take 30x as long also fails
# in a similarly fast fashion.
extra_huge = '7'*1_200_000
with self.assertRaises(ValueError) as err:
start = get_time()
# If not limited, 8 seconds in the Zen based cloud VM.
int(extra_huge)
seconds_to_fail_extra_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8)

def test_power_of_two_bases_unlimited(self):
"""The limit does not apply to power of 2 bases."""
maxdigits = sys.get_int_max_str_digits()
Expand Down
26 changes: 22 additions & 4 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ medium_value(PyLongObject *x)
#define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS)
#define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS)

#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion"

static inline void
_Py_DECREF_INT(PyLongObject *op)
Expand Down Expand Up @@ -1758,6 +1759,23 @@ long_to_decimal_string_internal(PyObject *aa,
size_a = Py_ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;

/* quick and dirty pre-check for overflowing the decimal digit limit,
based on the inequality 10/3 >= log2(10)

explanation in https://github.com/python/cpython/pull/96537
*/
if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD
/ (3 * PyLong_SHIFT) + 2) {
PyInterpreterState *interp = _PyInterpreterState_GET();
int max_str_digits = interp->int_max_str_digits;
if ((max_str_digits > 0) &&
(max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) {
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
max_str_digits);
return -1;
}
}

/* quick and dirty upper bound for the number of digits
required to express a in base _PyLong_DECIMAL_BASE:

Expand Down Expand Up @@ -1823,8 +1841,8 @@ long_to_decimal_string_internal(PyObject *aa,
Py_ssize_t strlen_nosign = strlen - negative;
if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
Py_DECREF(scratch);
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
max_str_digits, strlen_nosign);
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
max_str_digits);
return -1;
}
}
Expand Down Expand Up @@ -2498,7 +2516,7 @@ digit beyond the first.
PyInterpreterState *interp = _PyInterpreterState_GET();
int max_str_digits = interp->int_max_str_digits;
if ((max_str_digits > 0) && (digits > max_str_digits)) {
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT,
max_str_digits, digits);
return NULL;
}
Expand Down