From c86821e5aa25f435d712ba1b3dfdbd75fa93d992 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 02:25:34 +0000 Subject: [PATCH 01/17] gh-111140: Adds PyLong_AsByteArray functions --- Include/cpython/longobject.h | 40 +++++- Lib/test/test_capi/test_long.py | 93 ++++++++++++ Modules/_io/textio.c | 2 +- Modules/_pickle.c | 3 +- Modules/_randommodule.c | 3 +- Modules/_sqlite/util.c | 2 +- Modules/_struct.c | 20 +-- Modules/_testcapi/long.c | 75 ++++++++++ Modules/_tkinter.c | 3 +- Modules/cjkcodecs/multibytecodec.c | 6 +- Objects/longobject.c | 221 ++++++++++++++++++++++++++++- 11 files changed, 445 insertions(+), 23 deletions(-) diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index fd1be29ed397d1..65c252d6c47f2a 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,6 +4,44 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); +/* PyLong_AsByteArray: Copy the integer value to a native address. + n is the number of bytes available in the buffer. + Uses the current build's default endianness. + PyLong_AsByteArray ensures the MSB matches the int's sign. + PyLong_AsUnsignedByteArray allows the MSB to be set for a + positive Python value provided no significant bits are lost + (that is, all higher bits are a sign extension of the MSB). + Negative values still sign extend to fill the buffer and are + guaranteed to have MSB set. Use PyLong_AsByteArray to guarantee + that the MSB is set iff the int was negative. + + Returns 0 on success or -1 with an exception set, but if the buffer is + not big enough, returns the desired buffer size without setting an + exception. Note that the desired size may be larger than strictly + necessary to avoid precise calculations. */ +PyAPI_FUNC(int) PyLong_AsByteArray(PyObject* v, void* buffer, size_t n); +PyAPI_FUNC(int) PyLong_AsUnsignedByteArray(PyObject* v, void* buffer, size_t n); +PyAPI_FUNC(int) PyLong_AsByteArrayWithOptions(PyObject* v, void* buffer, + size_t n, int options); + +#define PYLONG_ASBYTEARRAY_NATIVE_ENDIAN 0x00 +#define PYLONG_ASBYTEARRAY_LITTLE_ENDIAN 0x01 +#define PYLONG_ASBYTEARRAY_BIG_ENDIAN 0x02 + +#define PYLONG_ASBYTEARRAY_SIGNED 0x00 +#define PYLONG_ASBYTEARRAY_UNSIGNED 0x04 + +/* PyLong_FromByteArray: Create an integer value containing the number from + a native buffer. + n is the number of bytes to read from the buffer. + Uses the current build's default endianness, and assumes the value was + sign extended to 'n' bytes. + PyLong_FromUnsignedByteArray assumes the value was zero extended. + + Returns the int object, or NULL with an exception set. */ +PyAPI_FUNC(PyObject) PyLong_FromByteArray(void* buffer, size_t n); +PyAPI_FUNC(PyObject) PyLong_FromUnsignedByteArray(void* buffer, size_t n); + PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); @@ -50,7 +88,7 @@ PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( */ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, unsigned char* bytes, size_t n, - int little_endian, int is_signed); + int little_endian, int is_signed, int with_exceptions); /* For use by the gcd function in mathmodule.c */ PyAPI_FUNC(PyObject *) _PyLong_GCD(PyObject *, PyObject *); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 8e3ef25d1ff86f..5c73a1968e1c32 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -1,5 +1,6 @@ import unittest import sys +import test.support as support from test.support import import_helper @@ -423,6 +424,98 @@ def test_long_asvoidptr(self): self.assertRaises(OverflowError, asvoidptr, -2**1000) # CRASHES asvoidptr(NULL) + def test_long_asbytearray(self): + import math + from _testcapi import ( + pylong_asbytearray as asbytearray, + pylong_asunsignedbytearray as asunsignedbytearray, + pylong_asbytearraywithoptions as asbytearraywithoptions, + SIZE_MAX + ) + + def log2(x): + return math.log(x) / math.log(2) + + SIZEOF_SIZE = int(math.ceil(log2(SIZE_MAX + 1)) / 8) + MAX_SSIZE = 2 ** (SIZEOF_SIZE * 8 - 1) - 1 + MAX_USIZE = 2 ** (SIZEOF_SIZE * 8) - 1 + if support.verbose: + print(f"{SIZEOF_SIZE=}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") + + for v, expect_u, expect_s in [ + (0, SIZEOF_SIZE, SIZEOF_SIZE), + (512, SIZEOF_SIZE, SIZEOF_SIZE), + (-512, SIZEOF_SIZE, SIZEOF_SIZE), + (MAX_SSIZE, SIZEOF_SIZE, SIZEOF_SIZE), + (MAX_USIZE, SIZEOF_SIZE, SIZEOF_SIZE + 1), + (-MAX_SSIZE, SIZEOF_SIZE, SIZEOF_SIZE), + (-MAX_USIZE, SIZEOF_SIZE, SIZEOF_SIZE + 1), + ]: + with self.subTest(f"sizeof-{v:X}"): + buffer = bytearray(1) + self.assertEqual(expect_u, asunsignedbytearray(v, buffer, 0), + "PyLong_AsUnsignedByteArray(v, NULL, 0)") + self.assertEqual(expect_s, asbytearray(v, buffer, 0), + "PyLong_AsByteArray(v, NULL, 0)") + self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, 0x05), + "PyLong_AsByteArrayWithOptions(v, NULL, 0, unsigned|little)") + self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, 0x06), + "PyLong_AsByteArrayWithOptions(v, NULL, 0, unsigned|big)") + self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, 0x01), + "PyLong_AsByteArrayWithOptions(v, NULL, 0, signed|little)") + self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, 0x02), + "PyLong_AsByteArrayWithOptions(v, NULL, 0, signed|big)") + + for v, expect_be, signed_fails in [ + (0, b'\x00', False), + (0, b'\x00' * 2, False), + (0, b'\x00' * 8, False), + (1, b'\x01', False), + (1, b'\x00' * 10 + b'\x01', False), + (42, b'\x2a', False), + (42, b'\x00' * 10 + b'\x2a', False), + (-1, b'\xff', False), + (-42, b'\xd6', False), + (-42, b'\xff' * 10 + b'\xd6', False), + # Only unsigned will extract 255 into a single byte + (255, b'\xff', True), + (255, b'\x00\xff', False), + (256, b'\x01\x00', False), + (2**63, b'\x80\x00\x00\x00\x00\x00\x00\x00', True), + (-2**63, b'\x80\x00\x00\x00\x00\x00\x00\x00', False), + (2**63, b'\x00\x80\x00\x00\x00\x00\x00\x00\x00', False), + (-2**63, b'\xFF\x80\x00\x00\x00\x00\x00\x00\x00', False), + ]: + with self.subTest(f"{v:X}-{len(expect_be)}bytes"): + n = len(expect_be) + buffer = bytearray(n) + expect_le = expect_be[::-1] + + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x05), + f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, unsigned+little)") + self.assertEqual(expect_le, buffer[:n], "unsigned+little") + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x06), + f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, unsigned+big)") + self.assertEqual(expect_be, buffer[:n], "unsigned+big") + + if signed_fails: + self.assertEqual( + max(n + 1, SIZEOF_SIZE), + asbytearraywithoptions(v, buffer, n, 0x01), + f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+little)", + ) + self.assertEqual( + max(n + 1, SIZEOF_SIZE), + asbytearraywithoptions(v, buffer, n, 0x02), + f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+big)", + ) + else: + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x01), + f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+little)") + self.assertEqual(expect_le, buffer[:n], "signed+little") + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x02), + f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+big)") + self.assertEqual(expect_be, buffer[:n], "signed+big") if __name__ == "__main__": unittest.main() diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d794af8de2b8f0..a3239ec0f52960 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2393,7 +2393,7 @@ textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) return -1; if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), - PY_LITTLE_ENDIAN, 0) < 0) { + PY_LITTLE_ENDIAN, 0, 1) < 0) { Py_DECREF(cookieLong); return -1; } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index f210c0ca205991..0d83261168185d 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2162,7 +2162,8 @@ save_long(PicklerObject *self, PyObject *obj) pdata = (unsigned char *)PyBytes_AS_STRING(repr); i = _PyLong_AsByteArray((PyLongObject *)obj, pdata, nbytes, - 1 /* little endian */ , 1 /* signed */ ); + 1 /* little endian */ , 1 /* signed */ , + 1 /* with exceptions */); if (i < 0) goto error; /* If the int is negative, this may be a byte more than diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index 4403e1d132c057..e0513d758032d2 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -342,7 +342,8 @@ random_seed(RandomObject *self, PyObject *arg) res = _PyLong_AsByteArray((PyLongObject *)n, (unsigned char *)key, keyused * 4, PY_LITTLE_ENDIAN, - 0); /* unsigned */ + 0, /* unsigned */ + 1); /* with exceptions */ if (res == -1) { goto Done; } diff --git a/Modules/_sqlite/util.c b/Modules/_sqlite/util.c index 833a666301d8ff..9e8613ef67916e 100644 --- a/Modules/_sqlite/util.c +++ b/Modules/_sqlite/util.c @@ -162,7 +162,7 @@ _pysqlite_long_as_int64(PyObject * py_val) sqlite_int64 int64val; if (_PyLong_AsByteArray((PyLongObject *)py_val, (unsigned char *)&int64val, sizeof(int64val), - IS_LITTLE_ENDIAN, 1 /* signed */) >= 0) { + IS_LITTLE_ENDIAN, 1 /* signed */, 0) >= 0) { return int64val; } } diff --git a/Modules/_struct.c b/Modules/_struct.c index bd16fa89f18945..fa2cd37e003e0a 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1000,9 +1000,10 @@ bp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f) (unsigned char *)p, 8, 0, /* little_endian */ - 1 /* signed */); + 1, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires %lld <= number <= %lld", f->format, @@ -1024,9 +1025,10 @@ bp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f (unsigned char *)p, 8, 0, /* little_endian */ - 0 /* signed */); + 0, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires 0 <= number <= %llu", f->format, @@ -1260,9 +1262,10 @@ lp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f) (unsigned char *)p, 8, 1, /* little_endian */ - 1 /* signed */); + 1, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires %lld <= number <= %lld", f->format, @@ -1284,9 +1287,10 @@ lp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f (unsigned char *)p, 8, 1, /* little_endian */ - 0 /* signed */); + 0, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires 0 <= number <= %llu", f->format, diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 32ad8d32ab8523..b6d46364486c87 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -776,6 +776,78 @@ pylong_asvoidptr(PyObject *module, PyObject *arg) return Py_NewRef((PyObject *)value); } +static PyObject * +pylong_asbytearray(PyObject *module, PyObject *args) +{ + PyObject *v; + Py_buffer buffer; + Py_ssize_t n; + if (!PyArg_ParseTuple(args, "Ow*n", &v, &buffer, &n)) { + return NULL; + } + if (buffer.readonly) { + PyErr_SetString(PyExc_TypeError, "buffer must be writable"); + PyBuffer_Release(&buffer); + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + int res = PyLong_AsByteArray(v, buffer.buf, n); + PyBuffer_Release(&buffer); + return res >= 0 ? PyLong_FromLong(res) : NULL; +} + +static PyObject * +pylong_asunsignedbytearray(PyObject *module, PyObject *args) +{ + PyObject *v; + Py_buffer buffer; + Py_ssize_t n; + if (!PyArg_ParseTuple(args, "Ow*n", &v, &buffer, &n)) { + return NULL; + } + if (buffer.readonly) { + PyErr_SetString(PyExc_TypeError, "buffer must be writable"); + PyBuffer_Release(&buffer); + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + int res = PyLong_AsUnsignedByteArray(v, buffer.buf, n); + PyBuffer_Release(&buffer); + return res >= 0 ? PyLong_FromLong(res) : NULL; +} + +static PyObject * +pylong_asbytearraywithoptions(PyObject *module, PyObject *args) +{ + PyObject *v; + Py_buffer buffer; + Py_ssize_t n, options; + if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &options)) { + return NULL; + } + if (buffer.readonly) { + PyErr_SetString(PyExc_TypeError, "buffer must be writable"); + PyBuffer_Release(&buffer); + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + int res = PyLong_AsByteArrayWithOptions(v, buffer.buf, n, (int)options); + PyBuffer_Release(&buffer); + return res >= 0 ? PyLong_FromLong(res) : NULL; +} + static PyMethodDef test_methods[] = { _TESTCAPI_TEST_LONG_AND_OVERFLOW_METHODDEF _TESTCAPI_TEST_LONG_API_METHODDEF @@ -804,6 +876,9 @@ static PyMethodDef test_methods[] = { {"pylong_as_size_t", pylong_as_size_t, METH_O}, {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, + {"pylong_asbytearray", pylong_asbytearray, METH_VARARGS}, + {"pylong_asunsignedbytearray", pylong_asunsignedbytearray, METH_VARARGS}, + {"pylong_asbytearraywithoptions", pylong_asbytearraywithoptions, METH_VARARGS}, {NULL}, }; diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index f6181168a85ae1..e3789867dc085f 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -926,7 +926,8 @@ AsObj(PyObject *value) (unsigned char *)(void *)&wideValue, sizeof(wideValue), PY_LITTLE_ENDIAN, - /* signed */ 1) == 0) { + /* signed */ 1, + /* with_exceptions */ 1) == 0) { return Tcl_NewWideIntObj(wideValue); } PyErr_Clear(); diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 5d3c16a98423ba..2125da437963d2 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -973,7 +973,8 @@ _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEn if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1 /* little-endian */ , - 0 /* unsigned */ ) < 0) { + 0 /* unsigned */ , + 1 /* with_exceptions */) < 0) { goto errorexit; } @@ -1255,7 +1256,8 @@ _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDe if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1 /* little-endian */ , - 0 /* unsigned */ ) < 0) { + 0 /* unsigned */ , + 1 /* with_exceptions */) < 0) { return NULL; } diff --git a/Objects/longobject.c b/Objects/longobject.c index e655ba19e8f1c1..798595b4cdcb9a 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -928,7 +928,8 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, int _PyLong_AsByteArray(PyLongObject* v, unsigned char* bytes, size_t n, - int little_endian, int is_signed) + int little_endian, int is_signed, + int with_exceptions) { Py_ssize_t i; /* index into v->long_value.ob_digit */ Py_ssize_t ndigits; /* number of digits */ @@ -945,8 +946,10 @@ _PyLong_AsByteArray(PyLongObject* v, ndigits = _PyLong_DigitCount(v); if (_PyLong_IsNegative(v)) { if (!is_signed) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative int to unsigned"); + if (with_exceptions) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative int to unsigned"); + } return -1; } do_twos_comp = 1; @@ -1052,11 +1055,215 @@ _PyLong_AsByteArray(PyLongObject* v, return 0; Overflow: - PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + if (with_exceptions) { + PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + } return -1; } +// Refactored out for readability, not reuse +static inline int +_fits_in_n_bits(Py_ssize_t v, Py_ssize_t n, int require_sign_bit) +{ + if (n > sizeof(Py_ssize_t) * 8) { + return 1; + } + // If all bits above n are the same, we fit. + // (Use n-1 if we require the sign bit to be consistent.) + Py_ssize_t v_extended = v >> ((int)n - (require_sign_bit ? 1 : 0)); + return v_extended == 0 || v_extended == -1; +} + +int +PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) +{ + PyLongObject *v; + union { + Py_ssize_t v; + unsigned char b[sizeof(Py_ssize_t)]; + } cv; + int do_decref = 0; + int res = 0; + int signed_ = !(options & PYLONG_ASBYTEARRAY_UNSIGNED); + int little_endian = PY_LITTLE_ENDIAN; + switch (options & (PYLONG_ASBYTEARRAY_LITTLE_ENDIAN | PYLONG_ASBYTEARRAY_BIG_ENDIAN)) { + case 0: + break; + case PYLONG_ASBYTEARRAY_BIG_ENDIAN: + little_endian = 0; + break; + case PYLONG_ASBYTEARRAY_LITTLE_ENDIAN: + little_endian = 1; + break; + default: + PyErr_SetString(PyExc_ValueError, "invalid 'options' value"); + return -1; + } + + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + + if (PyLong_Check(vv)) { + v = (PyLongObject *)vv; + } + else { + v = (PyLongObject *)_PyNumber_Index(vv); + if (v == NULL) { + return -1; + } + do_decref = 1; + } + + if (_PyLong_IsCompact(v)) { + res = 0; + cv.v = _PyLong_CompactValue(v); + if (n <= 0) { + /* Caller only requested the size, so we'll say we need + * Py_ssize_t bytes */ + res = sizeof(cv.b); + } + else if (n == sizeof(cv.b)) { + memcpy(buffer, cv.b, n); + } + else if (n < sizeof(cv.b)) { + if (!_fits_in_n_bits(cv.v, n * 8, signed_)) { + /* Value is too big for 'n' bytes, so we'll say we need + * at least Py_ssize_t bytes */ + res = sizeof(cv.b); + } +#if PY_LITTLE_ENDIAN + else if (little_endian) { + memcpy(buffer, cv.b, n); + } + else { + for (size_t i = 0; i < n; ++i) { + ((unsigned char*)buffer)[n - i - 1] = cv.b[i]; + } + } +#else + else if (little_endian) { + for (size_t i = 0; i < n; ++i) { + ((unsigned char*)buffer)[i] = cv.b[sizeof(cv.b) - i - 1]; + } + } + else { + memcpy(buffer, &cv.b[sizeof(cv.b) - n], n); + } +#endif + } + else { + unsigned char fill = cv.v < 0 ? 0xFF : 0x00; +#if PY_LITTLE_ENDIAN + if (little_endian) { + memcpy(buffer, cv.b, sizeof(cv.b)); + memset((char *)buffer + sizeof(cv.b), fill, n - sizeof(cv.b)); + } + else { + unsigned char *b = (unsigned char *)buffer; + for (size_t i = 0; i < n - sizeof(cv.b); ++i) { + *b++ = fill; + } + for (size_t i = sizeof(cv.b); i > 0; --i) { + *b++ = cv.b[i - 1]; + } + } +#else + if (little_endian) { + unsigned char *b = (unsigned char *)buffer; + for (size_t i = sizeof(cv.b); i > 0; --i) { + *b++ = cv.b[i - 1]; + } + for (size_t i = 0; i < n - sizeof(cv.b); ++i) { + *b++ = fill; + } + } + else { + memset(buffer, fill, n - sizeof(cv.b)); + memcpy((char *)buffer + n - sizeof(cv.b), cv.b, n); + } +#endif + } + } + else if (n <= 0) { + res = -1; + } + else if (!signed_ && _PyLong_IsNegative(v)) { + // This case is not supported by _PyLong_AsByteArray, so we extract a + // signed value, to a larger buffer first if needed. If we use a larger + // buffer, the extra data should be all bits set. If it is, then we can + // ignore it and return the number of bytes the caller requested + // (that's what the "unsigned" means). Otherwise, we need to return the + // actual number of bytes required. + res = _PyLong_AsByteArray(v, buffer, n, little_endian, 1, 0); + if (res < 0) { + unsigned char *b = (unsigned char *)PyMem_Malloc(n + 1); + if (!b) { + res = -1; + goto error; + } + res = _PyLong_AsByteArray(v, b, n + 1, little_endian, 1, 0); + if (res == 0) { + // Ensure the extra byte is 0xFF + if (little_endian) { + if (b[n - 1] != 0xFF) { + res = -1; + } else { + memcpy(buffer, b, n); + } + } else { + if (b[0] != 0xFF) { + res = -1; + } else { + memcpy(buffer, &b[1], n); + } + } + } + PyMem_Free(b); + } + } + else { + res = _PyLong_AsByteArray(v, buffer, n, little_endian, signed_, 0); + } + + // NOTE: res should only be <0 if a _PyLong_AsByteArray has failed without + // setting an exception, or we're requesting the size of a non-compact int. + // If you add another reason, you should update this! + if (res < 0) { + // More efficient calculation for number of bytes required? + size_t nb = _PyLong_NumBits((PyObject *)v) + (signed_ ? 1 : 0); + size_t n_needed = ((nb - 1) / 8) + 1; + res = (int)n_needed; + if (res != n_needed) { + PyErr_SetString(PyExc_OverflowError, + "value too large to convert"); + res = -1; + } + } + +error: + if (do_decref) { + Py_DECREF(v); + } + + return res; +} + +static int +PyLong_AsByteArray(PyObject* vv, void* buffer, size_t n) +{ + return PyLong_AsByteArrayWithOptions(vv, buffer, n, PYLONG_ASBYTEARRAY_SIGNED); +} + +static int +PyLong_AsUnsignedByteArray(PyObject* vv, void* buffer, size_t n) +{ + return PyLong_AsByteArrayWithOptions(vv, buffer, n, PYLONG_ASBYTEARRAY_UNSIGNED); +} + + /* Create a new int object from a C pointer */ PyObject * @@ -1231,7 +1438,7 @@ PyLong_AsLongLong(PyObject *vv) } else { res = _PyLong_AsByteArray((PyLongObject *)v, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1, 1); } if (do_decref) { Py_DECREF(v); @@ -1270,7 +1477,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv) } else { res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0, 1); } /* Plan 9 can't handle long long in ? : expressions */ @@ -6068,7 +6275,7 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, if (_PyLong_AsByteArray((PyLongObject *)self, (unsigned char *)PyBytes_AS_STRING(bytes), - length, little_endian, is_signed) < 0) { + length, little_endian, is_signed, 1) < 0) { Py_DECREF(bytes); return NULL; } From cd6754a40677e697035b7ebde91d7f85e7873a00 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 12:27:29 +0000 Subject: [PATCH 02/17] Statics --- Objects/longobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 798595b4cdcb9a..65cc22d24e8acf 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1066,7 +1066,7 @@ _PyLong_AsByteArray(PyLongObject* v, static inline int _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n, int require_sign_bit) { - if (n > sizeof(Py_ssize_t) * 8) { + if (n > (Py_ssize_t)sizeof(Py_ssize_t) * 8) { return 1; } // If all bits above n are the same, we fit. @@ -1251,13 +1251,13 @@ PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) return res; } -static int +int PyLong_AsByteArray(PyObject* vv, void* buffer, size_t n) { return PyLong_AsByteArrayWithOptions(vv, buffer, n, PYLONG_ASBYTEARRAY_SIGNED); } -static int +int PyLong_AsUnsignedByteArray(PyObject* vv, void* buffer, size_t n) { return PyLong_AsByteArrayWithOptions(vv, buffer, n, PYLONG_ASBYTEARRAY_UNSIGNED); From bf86f851fe4e2f05efeffb94d786be4b26fb8d44 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 13:38:45 +0000 Subject: [PATCH 03/17] Better options handling and tests --- Include/cpython/longobject.h | 15 +++--- Lib/test/test_capi/test_long.py | 63 +++++++++++++++++----- Modules/_testcapi/long.c | 6 ++- Objects/longobject.c | 95 +++++++++++++++++++++++---------- 4 files changed, 131 insertions(+), 48 deletions(-) diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 65c252d6c47f2a..87f172db6d66aa 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -24,19 +24,22 @@ PyAPI_FUNC(int) PyLong_AsUnsignedByteArray(PyObject* v, void* buffer, size_t n); PyAPI_FUNC(int) PyLong_AsByteArrayWithOptions(PyObject* v, void* buffer, size_t n, int options); -#define PYLONG_ASBYTEARRAY_NATIVE_ENDIAN 0x00 -#define PYLONG_ASBYTEARRAY_LITTLE_ENDIAN 0x01 -#define PYLONG_ASBYTEARRAY_BIG_ENDIAN 0x02 +/* Deliberately avoiding values 0 and 1 to avoid letting people + accidentally pass PY_LITTLE_ENDIAN as a constant. */ +#define PYLONG_ASBYTEARRAY_LITTLE_ENDIAN 0x04 +#define PYLONG_ASBYTEARRAY_BIG_ENDIAN 0x08 +#define PYLONG_ASBYTEARRAY_NATIVE_ENDIAN (0x04|0x08) -#define PYLONG_ASBYTEARRAY_SIGNED 0x00 -#define PYLONG_ASBYTEARRAY_UNSIGNED 0x04 +#define PYLONG_ASBYTEARRAY_SIGNED 0x10 +#define PYLONG_ASBYTEARRAY_UNSIGNED 0x20 /* PyLong_FromByteArray: Create an integer value containing the number from a native buffer. n is the number of bytes to read from the buffer. Uses the current build's default endianness, and assumes the value was sign extended to 'n' bytes. - PyLong_FromUnsignedByteArray assumes the value was zero extended. + PyLong_FromUnsignedByteArray assumes the value was zero extended, and + even if the MSB is set the resulting int will be positive. Returns the int object, or NULL with an exception set. */ PyAPI_FUNC(PyObject) PyLong_FromByteArray(void* buffer, size_t n); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 5c73a1968e1c32..bbabdb69b10689 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -430,7 +430,12 @@ def test_long_asbytearray(self): pylong_asbytearray as asbytearray, pylong_asunsignedbytearray as asunsignedbytearray, pylong_asbytearraywithoptions as asbytearraywithoptions, - SIZE_MAX + SIZE_MAX, + PYLONG_ASBYTEARRAY_NATIVE_ENDIAN, + PYLONG_ASBYTEARRAY_LITTLE_ENDIAN, + PYLONG_ASBYTEARRAY_BIG_ENDIAN, + PYLONG_ASBYTEARRAY_SIGNED, + PYLONG_ASBYTEARRAY_UNSIGNED, ) def log2(x): @@ -442,6 +447,19 @@ def log2(x): if support.verbose: print(f"{SIZEOF_SIZE=}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") + U_LE = PYLONG_ASBYTEARRAY_UNSIGNED | PYLONG_ASBYTEARRAY_LITTLE_ENDIAN + U_BE = PYLONG_ASBYTEARRAY_UNSIGNED | PYLONG_ASBYTEARRAY_BIG_ENDIAN + S_LE = PYLONG_ASBYTEARRAY_SIGNED | PYLONG_ASBYTEARRAY_LITTLE_ENDIAN + S_BE = PYLONG_ASBYTEARRAY_SIGNED | PYLONG_ASBYTEARRAY_BIG_ENDIAN + + # Ensure options of 0 and 1 raise + with self.assertRaises(SystemError): + asbytearraywithoptions(0, bytearray(1), 0, 0) + with self.assertRaises(SystemError): + asbytearraywithoptions(0, bytearray(1), 0, 1) + + # These tests request the required buffer size for both unsigned and + # signed options. for v, expect_u, expect_s in [ (0, SIZEOF_SIZE, SIZEOF_SIZE), (512, SIZEOF_SIZE, SIZEOF_SIZE), @@ -450,6 +468,10 @@ def log2(x): (MAX_USIZE, SIZEOF_SIZE, SIZEOF_SIZE + 1), (-MAX_SSIZE, SIZEOF_SIZE, SIZEOF_SIZE), (-MAX_USIZE, SIZEOF_SIZE, SIZEOF_SIZE + 1), + (2**255-1, 32, 32), + (-(2**255-1), 32, 32), + (2**256-1, 32, 33), + (-(2**256-1), 32, 33), ]: with self.subTest(f"sizeof-{v:X}"): buffer = bytearray(1) @@ -457,15 +479,17 @@ def log2(x): "PyLong_AsUnsignedByteArray(v, NULL, 0)") self.assertEqual(expect_s, asbytearray(v, buffer, 0), "PyLong_AsByteArray(v, NULL, 0)") - self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, 0x05), + self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, U_LE), "PyLong_AsByteArrayWithOptions(v, NULL, 0, unsigned|little)") - self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, 0x06), + self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, U_BE), "PyLong_AsByteArrayWithOptions(v, NULL, 0, unsigned|big)") - self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, 0x01), + self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, S_LE), "PyLong_AsByteArrayWithOptions(v, NULL, 0, signed|little)") - self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, 0x02), + self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, S_BE), "PyLong_AsByteArrayWithOptions(v, NULL, 0, signed|big)") + # We request as many bytes as `expect_be` contains, so tests for the + # same value may test different things with a longer expected result. for v, expect_be, signed_fails in [ (0, b'\x00', False), (0, b'\x00' * 2, False), @@ -475,6 +499,7 @@ def log2(x): (42, b'\x2a', False), (42, b'\x00' * 10 + b'\x2a', False), (-1, b'\xff', False), + (-1, b'\xff' * 10, False), (-42, b'\xd6', False), (-42, b'\xff' * 10 + b'\xd6', False), # Only unsigned will extract 255 into a single byte @@ -484,36 +509,50 @@ def log2(x): (2**63, b'\x80\x00\x00\x00\x00\x00\x00\x00', True), (-2**63, b'\x80\x00\x00\x00\x00\x00\x00\x00', False), (2**63, b'\x00\x80\x00\x00\x00\x00\x00\x00\x00', False), - (-2**63, b'\xFF\x80\x00\x00\x00\x00\x00\x00\x00', False), + (-2**63, b'\xff\x80\x00\x00\x00\x00\x00\x00\x00', False), + (2**255-1, b'\x7f' + b'\xff' * 31, False), + (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', False), + (-(2**255-1), b'\xff\x80' + b'\x00' * 30 + b'\x01', False), + # Cannot extract 256 bits into 32 bytes as signed ... + (2**256-1, b'\xff' * 32, True), + # ... but can extract into 33 bytes (zero extended) + (2**256-1, b'\x00' + b'\xff' * 32, False), + + # Unsigned extract of this negative number to 32 bytes will succeed, + # sacrificing the sign bit and overall magnitude, just like a C + # cast (int256_t)0x1_0000_..._0001 would. But signed extract fails + (-(2**256-1), b'\x00' * 31 + b'\x01', True), + # Extract to 33 bytes preserves the top-most bits in all cases. + (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', False), ]: with self.subTest(f"{v:X}-{len(expect_be)}bytes"): n = len(expect_be) buffer = bytearray(n) expect_le = expect_be[::-1] - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x05), + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, U_LE), f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, unsigned+little)") self.assertEqual(expect_le, buffer[:n], "unsigned+little") - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x06), + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, U_BE), f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, unsigned+big)") self.assertEqual(expect_be, buffer[:n], "unsigned+big") if signed_fails: self.assertEqual( max(n + 1, SIZEOF_SIZE), - asbytearraywithoptions(v, buffer, n, 0x01), + asbytearraywithoptions(v, buffer, n, S_LE), f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+little)", ) self.assertEqual( max(n + 1, SIZEOF_SIZE), - asbytearraywithoptions(v, buffer, n, 0x02), + asbytearraywithoptions(v, buffer, n, S_BE), f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+big)", ) else: - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x01), + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, S_LE), f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+little)") self.assertEqual(expect_le, buffer[:n], "signed+little") - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, 0x02), + self.assertEqual(0, asbytearraywithoptions(v, buffer, n, S_BE), f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+big)") self.assertEqual(expect_be, buffer[:n], "signed+big") diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index b6d46364486c87..94515338b39478 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -888,6 +888,10 @@ _PyTestCapi_Init_Long(PyObject *mod) if (PyModule_AddFunctions(mod, test_methods) < 0) { return -1; } - + if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_NATIVE_ENDIAN)) return -1; + if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_LITTLE_ENDIAN)) return -1; + if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_BIG_ENDIAN)) return -1; + if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_SIGNED)) return -1; + if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_UNSIGNED)) return -1; return 0; } diff --git a/Objects/longobject.c b/Objects/longobject.c index 65cc22d24e8acf..7df231782f1ce6 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1075,8 +1075,11 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n, int require_sign_bit) return v_extended == 0 || v_extended == -1; } -int -PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) +// Private function means we can add more options safely. +// The public API parses a flags parameter to extract the requested settings. +static int +_PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, + int signed_, int little_endian) { PyLongObject *v; union { @@ -1085,21 +1088,6 @@ PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) } cv; int do_decref = 0; int res = 0; - int signed_ = !(options & PYLONG_ASBYTEARRAY_UNSIGNED); - int little_endian = PY_LITTLE_ENDIAN; - switch (options & (PYLONG_ASBYTEARRAY_LITTLE_ENDIAN | PYLONG_ASBYTEARRAY_BIG_ENDIAN)) { - case 0: - break; - case PYLONG_ASBYTEARRAY_BIG_ENDIAN: - little_endian = 0; - break; - case PYLONG_ASBYTEARRAY_LITTLE_ENDIAN: - little_endian = 1; - break; - default: - PyErr_SetString(PyExc_ValueError, "invalid 'options' value"); - return -1; - } if (vv == NULL) { PyErr_BadInternalCall(); @@ -1191,12 +1179,12 @@ PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) res = -1; } else if (!signed_ && _PyLong_IsNegative(v)) { - // This case is not supported by _PyLong_AsByteArray, so we extract a - // signed value, to a larger buffer first if needed. If we use a larger - // buffer, the extra data should be all bits set. If it is, then we can - // ignore it and return the number of bytes the caller requested - // (that's what the "unsigned" means). Otherwise, we need to return the - // actual number of bytes required. + /* This case is not supported by _PyLong_AsByteArray, so we extract a + * signed value, to a larger buffer first if needed. If we use a larger + * buffer, the extra data should be all bits set. If it is, then we can + * ignore it and return the number of bytes the caller requested + * (that's what the "unsigned" means). Otherwise, we need to return the + * actual number of bytes required. */ res = _PyLong_AsByteArray(v, buffer, n, little_endian, 1, 0); if (res < 0) { unsigned char *b = (unsigned char *)PyMem_Malloc(n + 1); @@ -1208,7 +1196,8 @@ PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) if (res == 0) { // Ensure the extra byte is 0xFF if (little_endian) { - if (b[n - 1] != 0xFF) { + // (remember, we allocated n+1 bytes) + if (b[n] != 0xFF) { res = -1; } else { memcpy(buffer, b, n); @@ -1228,9 +1217,9 @@ PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) res = _PyLong_AsByteArray(v, buffer, n, little_endian, signed_, 0); } - // NOTE: res should only be <0 if a _PyLong_AsByteArray has failed without - // setting an exception, or we're requesting the size of a non-compact int. - // If you add another reason, you should update this! + /* NOTE: res should only be <0 if a _PyLong_AsByteArray has failed without + * setting an exception, or we're requesting the size of a non-compact int. + * If you add another reason, you should update this! */ if (res < 0) { // More efficient calculation for number of bytes required? size_t nb = _PyLong_NumBits((PyObject *)v) + (signed_ ? 1 : 0); @@ -1251,16 +1240,64 @@ PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) return res; } +int +PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) +{ + int signed_; + int little_endian; + + // option 1 is excluded and always raises + if (!options || options & 0x01) { + PyErr_SetString(PyExc_SystemError, "invalid 'options' value"); + return -1; + } + + switch (options & (PYLONG_ASBYTEARRAY_SIGNED | PYLONG_ASBYTEARRAY_UNSIGNED)) { + case PYLONG_ASBYTEARRAY_SIGNED: + signed_ = 1; + break; + case PYLONG_ASBYTEARRAY_UNSIGNED: + signed_ = 0; + break; + case 0: + PyErr_SetString(PyExc_SystemError, "invalid 'options' value - no sign option"); + return -1; + default: + PyErr_SetString(PyExc_SystemError, "invalid 'options' value"); + return -1; + } + + switch (options & (PYLONG_ASBYTEARRAY_LITTLE_ENDIAN | PYLONG_ASBYTEARRAY_BIG_ENDIAN + | PYLONG_ASBYTEARRAY_NATIVE_ENDIAN)) { + case PYLONG_ASBYTEARRAY_BIG_ENDIAN: + little_endian = 0; + break; + case PYLONG_ASBYTEARRAY_LITTLE_ENDIAN: + little_endian = 1; + break; + case PYLONG_ASBYTEARRAY_NATIVE_ENDIAN: + little_endian = PY_LITTLE_ENDIAN; + break; + case 0: + PyErr_SetString(PyExc_SystemError, "invalid 'options' value - no endian option"); + return -1; + default: + PyErr_SetString(PyExc_SystemError, "invalid 'options' value"); + return -1; + } + return _PyLong_AsByteArrayWithOptions(vv, buffer, n, signed_, little_endian); +} + int PyLong_AsByteArray(PyObject* vv, void* buffer, size_t n) { - return PyLong_AsByteArrayWithOptions(vv, buffer, n, PYLONG_ASBYTEARRAY_SIGNED); + return _PyLong_AsByteArrayWithOptions(vv, buffer, n, 1, PY_LITTLE_ENDIAN); } int PyLong_AsUnsignedByteArray(PyObject* vv, void* buffer, size_t n) { - return PyLong_AsByteArrayWithOptions(vv, buffer, n, PYLONG_ASBYTEARRAY_UNSIGNED); + return _PyLong_AsByteArrayWithOptions(vv, buffer, n, 0, PY_LITTLE_ENDIAN); } From 22f3d147165f676940f3a997d0afe1f98e1c94d2 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 15:25:29 +0000 Subject: [PATCH 04/17] Rework into PyLong_CopyBits --- Include/cpython/longobject.h | 43 +++----- Lib/test/test_capi/test_long.py | 184 ++++++++++++-------------------- Modules/_testcapi/long.c | 65 +---------- Objects/longobject.c | 174 ++++++++---------------------- 4 files changed, 137 insertions(+), 329 deletions(-) diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 87f172db6d66aa..ea6560e6525f55 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,34 +4,23 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); -/* PyLong_AsByteArray: Copy the integer value to a native address. - n is the number of bytes available in the buffer. - Uses the current build's default endianness. - PyLong_AsByteArray ensures the MSB matches the int's sign. - PyLong_AsUnsignedByteArray allows the MSB to be set for a - positive Python value provided no significant bits are lost - (that is, all higher bits are a sign extension of the MSB). - Negative values still sign extend to fill the buffer and are - guaranteed to have MSB set. Use PyLong_AsByteArray to guarantee - that the MSB is set iff the int was negative. +/* PyLong_CopyBits: Copy the integer value to a native buffer. + n_bytes is the number of bytes available in the buffer. Pass 0 to request + the required size for the value. + endianness is -1 for native endian, 0 for big endian or 1 for little. - Returns 0 on success or -1 with an exception set, but if the buffer is - not big enough, returns the desired buffer size without setting an - exception. Note that the desired size may be larger than strictly - necessary to avoid precise calculations. */ -PyAPI_FUNC(int) PyLong_AsByteArray(PyObject* v, void* buffer, size_t n); -PyAPI_FUNC(int) PyLong_AsUnsignedByteArray(PyObject* v, void* buffer, size_t n); -PyAPI_FUNC(int) PyLong_AsByteArrayWithOptions(PyObject* v, void* buffer, - size_t n, int options); - -/* Deliberately avoiding values 0 and 1 to avoid letting people - accidentally pass PY_LITTLE_ENDIAN as a constant. */ -#define PYLONG_ASBYTEARRAY_LITTLE_ENDIAN 0x04 -#define PYLONG_ASBYTEARRAY_BIG_ENDIAN 0x08 -#define PYLONG_ASBYTEARRAY_NATIVE_ENDIAN (0x04|0x08) - -#define PYLONG_ASBYTEARRAY_SIGNED 0x10 -#define PYLONG_ASBYTEARRAY_UNSIGNED 0x20 + If an exception is raised, returns a negative value. + Otherwise, returns the number of bytes that are required to store the value. + To check that the full value is represented, ensure that the return value is + equal or less than n_bytes. + All n_bytes are guaranteed to be written (unless an exception occurs), and + so ignoring a positive return value is the equivalent of a downcast in C. + In cases where the full value could not be represented, the returned value + may be larger than necessary - this function is not an accurate way to + calculate the bit length of an integer object. + */ +PyAPI_FUNC(int) PyLong_CopyBits(PyObject* v, void* buffer, size_t n_bytes, + int endianness); /* PyLong_FromByteArray: Create an integer value containing the number from a native buffer. diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index bbabdb69b10689..9d1282e3703341 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -424,137 +424,95 @@ def test_long_asvoidptr(self): self.assertRaises(OverflowError, asvoidptr, -2**1000) # CRASHES asvoidptr(NULL) - def test_long_asbytearray(self): + def test_long_copybits(self): import math from _testcapi import ( - pylong_asbytearray as asbytearray, - pylong_asunsignedbytearray as asunsignedbytearray, - pylong_asbytearraywithoptions as asbytearraywithoptions, + pylong_copybits as copybits, SIZE_MAX, - PYLONG_ASBYTEARRAY_NATIVE_ENDIAN, - PYLONG_ASBYTEARRAY_LITTLE_ENDIAN, - PYLONG_ASBYTEARRAY_BIG_ENDIAN, - PYLONG_ASBYTEARRAY_SIGNED, - PYLONG_ASBYTEARRAY_UNSIGNED, ) def log2(x): return math.log(x) / math.log(2) - SIZEOF_SIZE = int(math.ceil(log2(SIZE_MAX + 1)) / 8) - MAX_SSIZE = 2 ** (SIZEOF_SIZE * 8 - 1) - 1 - MAX_USIZE = 2 ** (SIZEOF_SIZE * 8) - 1 + # Abbreviated sizeof(Py_ssize_t) because we use it a lot + SZ = int(math.ceil(log2(SIZE_MAX + 1)) / 8) + MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 + MAX_USIZE = 2 ** (SZ * 8) - 1 if support.verbose: - print(f"{SIZEOF_SIZE=}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") - - U_LE = PYLONG_ASBYTEARRAY_UNSIGNED | PYLONG_ASBYTEARRAY_LITTLE_ENDIAN - U_BE = PYLONG_ASBYTEARRAY_UNSIGNED | PYLONG_ASBYTEARRAY_BIG_ENDIAN - S_LE = PYLONG_ASBYTEARRAY_SIGNED | PYLONG_ASBYTEARRAY_LITTLE_ENDIAN - S_BE = PYLONG_ASBYTEARRAY_SIGNED | PYLONG_ASBYTEARRAY_BIG_ENDIAN - - # Ensure options of 0 and 1 raise - with self.assertRaises(SystemError): - asbytearraywithoptions(0, bytearray(1), 0, 0) - with self.assertRaises(SystemError): - asbytearraywithoptions(0, bytearray(1), 0, 1) - - # These tests request the required buffer size for both unsigned and - # signed options. - for v, expect_u, expect_s in [ - (0, SIZEOF_SIZE, SIZEOF_SIZE), - (512, SIZEOF_SIZE, SIZEOF_SIZE), - (-512, SIZEOF_SIZE, SIZEOF_SIZE), - (MAX_SSIZE, SIZEOF_SIZE, SIZEOF_SIZE), - (MAX_USIZE, SIZEOF_SIZE, SIZEOF_SIZE + 1), - (-MAX_SSIZE, SIZEOF_SIZE, SIZEOF_SIZE), - (-MAX_USIZE, SIZEOF_SIZE, SIZEOF_SIZE + 1), - (2**255-1, 32, 32), - (-(2**255-1), 32, 32), - (2**256-1, 32, 33), - (-(2**256-1), 32, 33), + print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") + + # These tests check that the requested buffer size is correct + for v, expect in [ + (0, SZ), + (512, SZ), + (-512, SZ), + (MAX_SSIZE, SZ), + (MAX_USIZE, SZ + 1), + (-MAX_SSIZE, SZ), + (-MAX_USIZE, SZ + 1), + (2**255-1, 32), + (-(2**255-1), 32), + (2**256-1, 33), + (-(2**256-1), 33), ]: with self.subTest(f"sizeof-{v:X}"): buffer = bytearray(1) - self.assertEqual(expect_u, asunsignedbytearray(v, buffer, 0), - "PyLong_AsUnsignedByteArray(v, NULL, 0)") - self.assertEqual(expect_s, asbytearray(v, buffer, 0), - "PyLong_AsByteArray(v, NULL, 0)") - self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, U_LE), - "PyLong_AsByteArrayWithOptions(v, NULL, 0, unsigned|little)") - self.assertEqual(expect_u, asbytearraywithoptions(v, buffer, 0, U_BE), - "PyLong_AsByteArrayWithOptions(v, NULL, 0, unsigned|big)") - self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, S_LE), - "PyLong_AsByteArrayWithOptions(v, NULL, 0, signed|little)") - self.assertEqual(expect_s, asbytearraywithoptions(v, buffer, 0, S_BE), - "PyLong_AsByteArrayWithOptions(v, NULL, 0, signed|big)") - - # We request as many bytes as `expect_be` contains, so tests for the - # same value may test different things with a longer expected result. - for v, expect_be, signed_fails in [ - (0, b'\x00', False), - (0, b'\x00' * 2, False), - (0, b'\x00' * 8, False), - (1, b'\x01', False), - (1, b'\x00' * 10 + b'\x01', False), - (42, b'\x2a', False), - (42, b'\x00' * 10 + b'\x2a', False), - (-1, b'\xff', False), - (-1, b'\xff' * 10, False), - (-42, b'\xd6', False), - (-42, b'\xff' * 10 + b'\xd6', False), - # Only unsigned will extract 255 into a single byte - (255, b'\xff', True), - (255, b'\x00\xff', False), - (256, b'\x01\x00', False), - (2**63, b'\x80\x00\x00\x00\x00\x00\x00\x00', True), - (-2**63, b'\x80\x00\x00\x00\x00\x00\x00\x00', False), - (2**63, b'\x00\x80\x00\x00\x00\x00\x00\x00\x00', False), - (-2**63, b'\xff\x80\x00\x00\x00\x00\x00\x00\x00', False), - (2**255-1, b'\x7f' + b'\xff' * 31, False), - (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', False), - (-(2**255-1), b'\xff\x80' + b'\x00' * 30 + b'\x01', False), - # Cannot extract 256 bits into 32 bytes as signed ... - (2**256-1, b'\xff' * 32, True), - # ... but can extract into 33 bytes (zero extended) - (2**256-1, b'\x00' + b'\xff' * 32, False), - - # Unsigned extract of this negative number to 32 bytes will succeed, - # sacrificing the sign bit and overall magnitude, just like a C - # cast (int256_t)0x1_0000_..._0001 would. But signed extract fails - (-(2**256-1), b'\x00' * 31 + b'\x01', True), - # Extract to 33 bytes preserves the top-most bits in all cases. - (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', False), + self.assertEqual(expect, copybits(v, buffer, 0, -1), + "PyLong_CopyBits(v, NULL, 0, -1)") + + # We request as many bytes as `expect_be` contains, and always check + # the result (both big and little endian). We check the return value + # independently, since the buffer should always be filled correctly even + # if we need more bytes + for v, expect_be, expect_n in [ + (0, b'\x00', 1), + (0, b'\x00' * 2, 2), + (0, b'\x00' * 8, 8), + (1, b'\x01', 1), + (1, b'\x00' * 10 + b'\x01', min(11, SZ)), + (42, b'\x2a', 1), + (42, b'\x00' * 10 + b'\x2a', min(11, SZ)), + (-1, b'\xff', 1), + (-1, b'\xff' * 10, min(11, SZ)), + (-42, b'\xd6', 1), + (-42, b'\xff' * 10 + b'\xd6', min(11, SZ)), + # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t) + (255, b'\xff', SZ), + (255, b'\x00\xff', 2), + (256, b'\x01\x00', 2), + # Extracts successfully (unsigned), but requests 9 bytes + (2**63, b'\x80' + b'\x00' * 7, 9), + # "Extracts", but requests 9 bytes + (-2**63, b'\x80' + b'\x00' * 7, 9), + (2**63, b'\x00\x80' + b'\x00' * 7, 9), + (-2**63, b'\xff\x80' + b'\x00' * 7, 9), + + (2**255-1, b'\x7f' + b'\xff' * 31, 32), + (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', 32), + # Request extra bytes, but result says we only needed 32 + (-(2**255-1), b'\xff\x80' + b'\x00' * 30 + b'\x01', 32), + (-(2**255-1), b'\xff\xff\x80' + b'\x00' * 30 + b'\x01', 32), + + # Extracting 256 bits of integer will request 33 bytes, but still + # copy as many bits as possible into the buffer. So we *can* copy + # into a 32-byte buffer, though negative number may be unrecoverable + (2**256-1, b'\xff' * 32, 33), + (2**256-1, b'\x00' + b'\xff' * 32, 33), + (-(2**256-1), b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), ]: with self.subTest(f"{v:X}-{len(expect_be)}bytes"): n = len(expect_be) buffer = bytearray(n) expect_le = expect_be[::-1] - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, U_LE), - f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, unsigned+little)") - self.assertEqual(expect_le, buffer[:n], "unsigned+little") - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, U_BE), - f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, unsigned+big)") - self.assertEqual(expect_be, buffer[:n], "unsigned+big") - - if signed_fails: - self.assertEqual( - max(n + 1, SIZEOF_SIZE), - asbytearraywithoptions(v, buffer, n, S_LE), - f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+little)", - ) - self.assertEqual( - max(n + 1, SIZEOF_SIZE), - asbytearraywithoptions(v, buffer, n, S_BE), - f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+big)", - ) - else: - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, S_LE), - f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+little)") - self.assertEqual(expect_le, buffer[:n], "signed+little") - self.assertEqual(0, asbytearraywithoptions(v, buffer, n, S_BE), - f"PyLong_AsByteArrayWithOptions(v, buffer, {n}, signed+big)") - self.assertEqual(expect_be, buffer[:n], "signed+big") + self.assertEqual(expect_n, copybits(v, buffer, n, 0), + f"PyLong_CopyBits(v, buffer, {n}, )") + self.assertEqual(expect_be, buffer[:n], "") + self.assertEqual(expect_n, copybits(v, buffer, n, 1), + f"PyLong_CopyBits(v, buffer, {n}, )") + self.assertEqual(expect_le, buffer[:n], "") if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 94515338b39478..8273ac586f66bc 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -777,12 +777,12 @@ pylong_asvoidptr(PyObject *module, PyObject *arg) } static PyObject * -pylong_asbytearray(PyObject *module, PyObject *args) +pylong_copybits(PyObject *module, PyObject *args) { PyObject *v; Py_buffer buffer; - Py_ssize_t n; - if (!PyArg_ParseTuple(args, "Ow*n", &v, &buffer, &n)) { + Py_ssize_t n, endianness; + if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) { return NULL; } if (buffer.readonly) { @@ -795,55 +795,7 @@ pylong_asbytearray(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - int res = PyLong_AsByteArray(v, buffer.buf, n); - PyBuffer_Release(&buffer); - return res >= 0 ? PyLong_FromLong(res) : NULL; -} - -static PyObject * -pylong_asunsignedbytearray(PyObject *module, PyObject *args) -{ - PyObject *v; - Py_buffer buffer; - Py_ssize_t n; - if (!PyArg_ParseTuple(args, "Ow*n", &v, &buffer, &n)) { - return NULL; - } - if (buffer.readonly) { - PyErr_SetString(PyExc_TypeError, "buffer must be writable"); - PyBuffer_Release(&buffer); - return NULL; - } - if (buffer.len < n) { - PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); - PyBuffer_Release(&buffer); - return NULL; - } - int res = PyLong_AsUnsignedByteArray(v, buffer.buf, n); - PyBuffer_Release(&buffer); - return res >= 0 ? PyLong_FromLong(res) : NULL; -} - -static PyObject * -pylong_asbytearraywithoptions(PyObject *module, PyObject *args) -{ - PyObject *v; - Py_buffer buffer; - Py_ssize_t n, options; - if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &options)) { - return NULL; - } - if (buffer.readonly) { - PyErr_SetString(PyExc_TypeError, "buffer must be writable"); - PyBuffer_Release(&buffer); - return NULL; - } - if (buffer.len < n) { - PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); - PyBuffer_Release(&buffer); - return NULL; - } - int res = PyLong_AsByteArrayWithOptions(v, buffer.buf, n, (int)options); + int res = PyLong_CopyBits(v, buffer.buf, n, (int)endianness); PyBuffer_Release(&buffer); return res >= 0 ? PyLong_FromLong(res) : NULL; } @@ -876,9 +828,7 @@ static PyMethodDef test_methods[] = { {"pylong_as_size_t", pylong_as_size_t, METH_O}, {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, - {"pylong_asbytearray", pylong_asbytearray, METH_VARARGS}, - {"pylong_asunsignedbytearray", pylong_asunsignedbytearray, METH_VARARGS}, - {"pylong_asbytearraywithoptions", pylong_asbytearraywithoptions, METH_VARARGS}, + {"pylong_copybits", pylong_copybits, METH_VARARGS}, {NULL}, }; @@ -888,10 +838,5 @@ _PyTestCapi_Init_Long(PyObject *mod) if (PyModule_AddFunctions(mod, test_methods) < 0) { return -1; } - if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_NATIVE_ENDIAN)) return -1; - if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_LITTLE_ENDIAN)) return -1; - if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_BIG_ENDIAN)) return -1; - if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_SIGNED)) return -1; - if (PyModule_AddIntMacro(mod, PYLONG_ASBYTEARRAY_UNSIGNED)) return -1; return 0; } diff --git a/Objects/longobject.c b/Objects/longobject.c index 7df231782f1ce6..895cfdd8207c98 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -970,7 +970,12 @@ _PyLong_AsByteArray(PyLongObject* v, /* Copy over all the Python digits. It's crucial that every Python digit except for the MSD contribute exactly PyLong_SHIFT bits to the total, so first assert that the int is - normalized. */ + normalized. + NOTE: PyLong_CopyBits() assumes that this function will fill in 'n' bytes + even if it eventually fails to convert the whole number. Make sure you + account for that if you are changing this algorithm to return without + doing that. + */ assert(ndigits == 0 || v->long_value.ob_digit[ndigits - 1] != 0); j = 0; accum = 0; @@ -1064,22 +1069,19 @@ _PyLong_AsByteArray(PyLongObject* v, // Refactored out for readability, not reuse static inline int -_fits_in_n_bits(Py_ssize_t v, Py_ssize_t n, int require_sign_bit) +_fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) { - if (n > (Py_ssize_t)sizeof(Py_ssize_t) * 8) { + if (n >= (Py_ssize_t)sizeof(Py_ssize_t) * 8) { return 1; } // If all bits above n are the same, we fit. // (Use n-1 if we require the sign bit to be consistent.) - Py_ssize_t v_extended = v >> ((int)n - (require_sign_bit ? 1 : 0)); + Py_ssize_t v_extended = v >> ((int)n - 1); return v_extended == 0 || v_extended == -1; } -// Private function means we can add more options safely. -// The public API parses a flags parameter to extract the requested settings. static int -_PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, - int signed_, int little_endian) +PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) { PyLongObject *v; union { @@ -1094,6 +1096,20 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, return -1; } + if ((int)n != n || (int)n < 0) { + PyErr_SetString(PyExc_SystemError, "n_bytes too big to copy"); + return -1; + } + + int little_endian = endianness; + if (endianness < 0) { + endianness = PY_LITTLE_ENDIAN; + } + if (endianness != 0 && endianness != 1) { + PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + return -1; + } + if (PyLong_Check(vv)) { v = (PyLongObject *)vv; } @@ -1108,22 +1124,16 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, if (_PyLong_IsCompact(v)) { res = 0; cv.v = _PyLong_CompactValue(v); + /* Most paths result in res = sizeof(compact value). Only the case + * where 0 < n < sizeof(compact value) do we need to check and adjust + * our return value. */ + res = sizeof(cv.b); if (n <= 0) { - /* Caller only requested the size, so we'll say we need - * Py_ssize_t bytes */ - res = sizeof(cv.b); - } - else if (n == sizeof(cv.b)) { - memcpy(buffer, cv.b, n); + // nothing to do! } - else if (n < sizeof(cv.b)) { - if (!_fits_in_n_bits(cv.v, n * 8, signed_)) { - /* Value is too big for 'n' bytes, so we'll say we need - * at least Py_ssize_t bytes */ - res = sizeof(cv.b); - } + else if (n <= sizeof(cv.b)) { #if PY_LITTLE_ENDIAN - else if (little_endian) { + if (little_endian) { memcpy(buffer, cv.b, n); } else { @@ -1132,7 +1142,7 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, } } #else - else if (little_endian) { + if (little_endian) { for (size_t i = 0; i < n; ++i) { ((unsigned char*)buffer)[i] = cv.b[sizeof(cv.b) - i - 1]; } @@ -1141,6 +1151,11 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, memcpy(buffer, &cv.b[sizeof(cv.b) - n], n); } #endif + + /* If we fit, return the requested number of bytes */ + if (_fits_in_n_bits(cv.v, n * 8)) { + res = (int)n; + } } else { unsigned char fill = cv.v < 0 ? 0xFF : 0x00; @@ -1175,55 +1190,17 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, #endif } } - else if (n <= 0) { - res = -1; - } - else if (!signed_ && _PyLong_IsNegative(v)) { - /* This case is not supported by _PyLong_AsByteArray, so we extract a - * signed value, to a larger buffer first if needed. If we use a larger - * buffer, the extra data should be all bits set. If it is, then we can - * ignore it and return the number of bytes the caller requested - * (that's what the "unsigned" means). Otherwise, we need to return the - * actual number of bytes required. */ - res = _PyLong_AsByteArray(v, buffer, n, little_endian, 1, 0); - if (res < 0) { - unsigned char *b = (unsigned char *)PyMem_Malloc(n + 1); - if (!b) { - res = -1; - goto error; - } - res = _PyLong_AsByteArray(v, b, n + 1, little_endian, 1, 0); - if (res == 0) { - // Ensure the extra byte is 0xFF - if (little_endian) { - // (remember, we allocated n+1 bytes) - if (b[n] != 0xFF) { - res = -1; - } else { - memcpy(buffer, b, n); - } - } else { - if (b[0] != 0xFF) { - res = -1; - } else { - memcpy(buffer, &b[1], n); - } - } - } - PyMem_Free(b); - } - } else { - res = _PyLong_AsByteArray(v, buffer, n, little_endian, signed_, 0); - } + if (n > 0) { + _PyLong_AsByteArray(v, buffer, n, little_endian, 1, 0); + } - /* NOTE: res should only be <0 if a _PyLong_AsByteArray has failed without - * setting an exception, or we're requesting the size of a non-compact int. - * If you add another reason, you should update this! */ - if (res < 0) { // More efficient calculation for number of bytes required? - size_t nb = _PyLong_NumBits((PyObject *)v) + (signed_ ? 1 : 0); - size_t n_needed = ((nb - 1) / 8) + 1; + size_t nb = _PyLong_NumBits((PyObject *)v); + /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up + * multiples of 8 to the next byte, but we add an implied bit for + * the sign and it cancels out. */ + size_t n_needed = (nb / 8) + 1; res = (int)n_needed; if (res != n_needed) { PyErr_SetString(PyExc_OverflowError, @@ -1232,7 +1209,6 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, } } -error: if (do_decref) { Py_DECREF(v); } @@ -1240,66 +1216,6 @@ _PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, return res; } -int -PyLong_AsByteArrayWithOptions(PyObject* vv, void* buffer, size_t n, int options) -{ - int signed_; - int little_endian; - - // option 1 is excluded and always raises - if (!options || options & 0x01) { - PyErr_SetString(PyExc_SystemError, "invalid 'options' value"); - return -1; - } - - switch (options & (PYLONG_ASBYTEARRAY_SIGNED | PYLONG_ASBYTEARRAY_UNSIGNED)) { - case PYLONG_ASBYTEARRAY_SIGNED: - signed_ = 1; - break; - case PYLONG_ASBYTEARRAY_UNSIGNED: - signed_ = 0; - break; - case 0: - PyErr_SetString(PyExc_SystemError, "invalid 'options' value - no sign option"); - return -1; - default: - PyErr_SetString(PyExc_SystemError, "invalid 'options' value"); - return -1; - } - - switch (options & (PYLONG_ASBYTEARRAY_LITTLE_ENDIAN | PYLONG_ASBYTEARRAY_BIG_ENDIAN - | PYLONG_ASBYTEARRAY_NATIVE_ENDIAN)) { - case PYLONG_ASBYTEARRAY_BIG_ENDIAN: - little_endian = 0; - break; - case PYLONG_ASBYTEARRAY_LITTLE_ENDIAN: - little_endian = 1; - break; - case PYLONG_ASBYTEARRAY_NATIVE_ENDIAN: - little_endian = PY_LITTLE_ENDIAN; - break; - case 0: - PyErr_SetString(PyExc_SystemError, "invalid 'options' value - no endian option"); - return -1; - default: - PyErr_SetString(PyExc_SystemError, "invalid 'options' value"); - return -1; - } - return _PyLong_AsByteArrayWithOptions(vv, buffer, n, signed_, little_endian); -} - -int -PyLong_AsByteArray(PyObject* vv, void* buffer, size_t n) -{ - return _PyLong_AsByteArrayWithOptions(vv, buffer, n, 1, PY_LITTLE_ENDIAN); -} - -int -PyLong_AsUnsignedByteArray(PyObject* vv, void* buffer, size_t n) -{ - return _PyLong_AsByteArrayWithOptions(vv, buffer, n, 0, PY_LITTLE_ENDIAN); -} - /* Create a new int object from a C pointer */ From c30e65a550ceb3ee4e112ef5bb70e3c91b3ffa53 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 15:37:26 +0000 Subject: [PATCH 05/17] Not static --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 895cfdd8207c98..dbc587bcb9a2ea 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1080,7 +1080,7 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) return v_extended == 0 || v_extended == -1; } -static int +int PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) { PyLongObject *v; From 9661b65043a56fc9715e75e487519947b8488372 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 15:42:11 +0000 Subject: [PATCH 06/17] Add motivating example to tests --- Lib/test/test_capi/test_long.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 9d1282e3703341..42aa0f7613418d 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -501,6 +501,12 @@ def log2(x): (-(2**256-1), b'\x00' * 31 + b'\x01', 33), (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), + + # The classic "Windows HRESULT as negative number" case + # HRESULT hr; + # PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT)) + # assert(hr == E_FAIL) + (-2147467259, b'\x80\x00\x40\x05', 4), ]: with self.subTest(f"{v:X}-{len(expect_be)}bytes"): n = len(expect_be) From 79d4942fed914342639b43b44b4c40aeac431105 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 16:02:56 +0000 Subject: [PATCH 07/17] Add FromBits functions --- Include/cpython/longobject.h | 18 +++++++------- Lib/test/test_capi/test_long.py | 41 +++++++++++++++++++++++------- Modules/_testcapi/long.c | 22 +++++++++++++++++ Objects/longobject.c | 44 +++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 18 deletions(-) diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index ea6560e6525f55..7245a575168a4e 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -22,17 +22,17 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); PyAPI_FUNC(int) PyLong_CopyBits(PyObject* v, void* buffer, size_t n_bytes, int endianness); -/* PyLong_FromByteArray: Create an integer value containing the number from - a native buffer. - n is the number of bytes to read from the buffer. - Uses the current build's default endianness, and assumes the value was - sign extended to 'n' bytes. - PyLong_FromUnsignedByteArray assumes the value was zero extended, and - even if the MSB is set the resulting int will be positive. +/* PyLong_FromBits: Create an int value from a native integer + n_bytes is the number of bytes to read from the buffer. Passing 0 will + always produce the zero int. + PyLong_FromUnsignedBits always produces a positive int. + endianness is -1 for native endian, 0 for big endian or 1 for little. Returns the int object, or NULL with an exception set. */ -PyAPI_FUNC(PyObject) PyLong_FromByteArray(void* buffer, size_t n); -PyAPI_FUNC(PyObject) PyLong_FromUnsignedByteArray(void* buffer, size_t n); +PyAPI_FUNC(PyObject*) PyLong_FromBits(void* buffer, size_t n_bytes, + int endianness); +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedBits(void* buffer, size_t n_bytes, + int endianness); PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 42aa0f7613418d..01d40222677765 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -426,16 +426,10 @@ def test_long_asvoidptr(self): def test_long_copybits(self): import math - from _testcapi import ( - pylong_copybits as copybits, - SIZE_MAX, - ) + from _testcapi import pylong_copybits as copybits, SIZE_MAX - def log2(x): - return math.log(x) / math.log(2) - - # Abbreviated sizeof(Py_ssize_t) because we use it a lot - SZ = int(math.ceil(log2(SIZE_MAX + 1)) / 8) + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 MAX_USIZE = 2 ** (SZ * 8) - 1 if support.verbose: @@ -520,5 +514,34 @@ def log2(x): f"PyLong_CopyBits(v, buffer, {n}, )") self.assertEqual(expect_le, buffer[:n], "") + def test_long_frombits(self): + import math + from _testcapi import pylong_frombits as frombits, SIZE_MAX + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 + MAX_USIZE = 2 ** (SZ * 8) - 1 + + for v_be, expect_s, expect_u in [ + (b'\x00', 0, 0), + (b'\x01', 1, 1), + (b'\xff', -1, 255), + (b'\x00\xff', 255, 255), + (b'\xff\xff', -1, 65535), + ]: + with self.subTest(f"{expect_s}-{expect_u:X}-{len(v_be)}bytes"): + n = len(v_be) + v_le = v_be[::-1] + + self.assertEqual(expect_s, frombits(v_be, n, 0, 1), + f"PyLong_FromBits(buffer, {n}, )") + self.assertEqual(expect_s, frombits(v_le, n, 1, 1), + f"PyLong_FromBits(buffer, {n}, )") + self.assertEqual(expect_u, frombits(v_be, n, 0, 0), + f"PyLong_FromUnsignedBits(buffer, {n}, )") + self.assertEqual(expect_u, frombits(v_le, n, 1, 0), + f"PyLong_FromUnsignedBits(buffer, {n}, )") + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 8273ac586f66bc..ac569bcaf4cdf0 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -800,6 +800,27 @@ pylong_copybits(PyObject *module, PyObject *args) return res >= 0 ? PyLong_FromLong(res) : NULL; } +static PyObject * +pylong_frombits(PyObject *module, PyObject *args) +{ + Py_buffer buffer; + Py_ssize_t n, endianness, signed_; + if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) { + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + PyObject *res = signed_ + ? PyLong_FromBits(buffer.buf, n, (int)endianness) + : PyLong_FromUnsignedBits(buffer.buf, n, (int)endianness); + PyBuffer_Release(&buffer); + return res; +} + + static PyMethodDef test_methods[] = { _TESTCAPI_TEST_LONG_AND_OVERFLOW_METHODDEF _TESTCAPI_TEST_LONG_API_METHODDEF @@ -829,6 +850,7 @@ static PyMethodDef test_methods[] = { {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, {"pylong_copybits", pylong_copybits, METH_VARARGS}, + {"pylong_frombits", pylong_frombits, METH_VARARGS}, {NULL}, }; diff --git a/Objects/longobject.c b/Objects/longobject.c index dbc587bcb9a2ea..b8054713028e15 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1217,6 +1217,50 @@ PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) } +PyObject * +PyLong_FromBits(const void* buffer, size_t n, int endianness) +{ + if (!buffer) { + PyErr_BadInternalCall(); + return NULL; + } + + int little_endian = endianness; + if (endianness < 0) { + endianness = PY_LITTLE_ENDIAN; + } + if (endianness != 0 && endianness != 1) { + PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + return NULL; + } + + return _PyLong_FromByteArray((const unsigned char *)buffer, n, + little_endian, 1); +} + + +PyObject * +PyLong_FromUnsignedBits(void* buffer, size_t n, int endianness) +{ + if (!buffer) { + PyErr_BadInternalCall(); + return NULL; + } + + int little_endian = endianness; + if (endianness < 0) { + endianness = PY_LITTLE_ENDIAN; + } + if (endianness != 0 && endianness != 1) { + PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + return NULL; + } + + return _PyLong_FromByteArray((const unsigned char *)buffer, n, + little_endian, 0); +} + + /* Create a new int object from a C pointer */ PyObject * From bc9e48c5781b17ddc09fdf803cd69478bfddd4f3 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 17:00:31 +0000 Subject: [PATCH 08/17] A test and a const --- Include/cpython/longobject.h | 4 ++-- Lib/test/test_capi/test_long.py | 2 +- Objects/longobject.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 7245a575168a4e..c2a553e43ccc7c 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -29,9 +29,9 @@ PyAPI_FUNC(int) PyLong_CopyBits(PyObject* v, void* buffer, size_t n_bytes, endianness is -1 for native endian, 0 for big endian or 1 for little. Returns the int object, or NULL with an exception set. */ -PyAPI_FUNC(PyObject*) PyLong_FromBits(void* buffer, size_t n_bytes, +PyAPI_FUNC(PyObject*) PyLong_FromBits(const void* buffer, size_t n_bytes, int endianness); -PyAPI_FUNC(PyObject*) PyLong_FromUnsignedBits(void* buffer, size_t n_bytes, +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedBits(const void* buffer, size_t n_bytes, int endianness); PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 01d40222677765..20e2b7e3b17a90 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -461,7 +461,7 @@ def test_long_copybits(self): for v, expect_be, expect_n in [ (0, b'\x00', 1), (0, b'\x00' * 2, 2), - (0, b'\x00' * 8, 8), + (0, b'\x00' * 8, min(8, SZ)), (1, b'\x01', 1), (1, b'\x00' * 10 + b'\x01', min(11, SZ)), (42, b'\x2a', 1), diff --git a/Objects/longobject.c b/Objects/longobject.c index b8054713028e15..66022df3c43f7f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1096,7 +1096,7 @@ PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) return -1; } - if ((int)n != n || (int)n < 0) { + if ((size_t)(int)n != n || (int)n < 0) { PyErr_SetString(PyExc_SystemError, "n_bytes too big to copy"); return -1; } @@ -1202,7 +1202,7 @@ PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) * the sign and it cancels out. */ size_t n_needed = (nb / 8) + 1; res = (int)n_needed; - if (res != n_needed) { + if ((size_t)res != n_needed) { PyErr_SetString(PyExc_OverflowError, "value too large to convert"); res = -1; @@ -1240,7 +1240,7 @@ PyLong_FromBits(const void* buffer, size_t n, int endianness) PyObject * -PyLong_FromUnsignedBits(void* buffer, size_t n, int endianness) +PyLong_FromUnsignedBits(const void* buffer, size_t n, int endianness) { if (!buffer) { PyErr_BadInternalCall(); From b4761be8406d1e7bdf9e0c69936e47f19ff1a32e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 2 Feb 2024 17:08:29 +0000 Subject: [PATCH 09/17] Space --- Modules/_testcapi/long.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index ac569bcaf4cdf0..9694d3c00eae36 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -813,7 +813,7 @@ pylong_frombits(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - PyObject *res = signed_ + PyObject *res = signed_ ? PyLong_FromBits(buffer.buf, n, (int)endianness) : PyLong_FromUnsignedBits(buffer.buf, n, (int)endianness); PyBuffer_Release(&buffer); From 761db5c11711565c3431a3f0e5f8caadc4c570ca Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 5 Feb 2024 17:12:58 +0000 Subject: [PATCH 10/17] Docs --- Doc/c-api/long.rst | 62 +++++++++++++++++++ Doc/whatsnew/3.13.rst | 6 +- Include/cpython/longobject.h | 2 +- ...-02-05-17-11-15.gh-issue-111140.WMEjid.rst | 2 + 4 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 045604870d3c84..f7488c81d4bd39 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -113,6 +113,28 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`. +.. c:function:: PyObject* PyLong_FromBits(const void* buffer, size_t n_bytes, int endianness) + + Create a Python integer from the value contained in the first *n_bytes* of + *buffer*, interpreted as twos-complement. + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or ``0`` for big endian and ``1`` for little. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyLong_FromUnsignedBits(const void* buffer, size_t n_bytes, int endianness) + + Create a Python integer from the value contained in the first *n_bytes* of + *buffer*, interpreted as an unsigned number. + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or ``0`` for big endian and ``1`` for little. + + .. versionadded:: 3.13 + + .. XXX alias PyLong_AS_LONG (for now) .. c:function:: long PyLong_AsLong(PyObject *obj) @@ -332,6 +354,46 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. +.. c:function:: int PyLong_CopyBits(PyObject *pylong, void* buffer, size_t n_bytes, int endianness) + + Copy the Python integer value to a native *buffer* of size *n_bytes*:: + + int value; + int bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); + if (bytes < 0) { + // Error occurred + return NULL; + } + else if (bytes > sizeof(value)) { + // Overflow occurred, but 'value' contains as much as could fit + } + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or ``0`` for big endian and ``1`` for little. + + Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as + an integer. Otherwise, returns the size of the buffer required to store the + value. If this is equal to or less than *n_bytes*, the entire value was + copied. + + Unless an exception is raised, all *n_bytes* of the buffer will be written + with as much of the value as can fit. This allows the caller to ignore all + non-negative results, if the intent is to match the typical behavior of a + C-style downcast. + + Values are always copied as twos-complement, and sufficient size will be + requested for a sign bit. For example, this may cause an value that fits into + 8-bytes when treated as unsigned to request 9 bytes, even though all eight + bytes were copied into the buffer. + + Passing *n_bytes* of zero will always return the requested buffer size. + The requested buffer size may be larger than necessary, but only when the + larger size is required to contain the full value. This function is not an + accurate way to determine the bit length of a value. + + .. versionadded:: 3.13 + + .. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op) Return 1 if *op* is compact, 0 otherwise. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index c25d41351c2f3c..fff52242c7728d 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1466,6 +1466,11 @@ New Features * Add :c:func:`Py_HashPointer` function to hash a pointer. (Contributed by Victor Stinner in :gh:`111545`.) +* Add :c:func:`PyLong_CopyBits`, :c:func:`PyLong_FromBits` and + :c:func:`PyLong_FromUnsignedBits` functions to simplify converting between + native integer types and Python ``int`` objects. + (Contributed by Steve Dower in :gh:`111140`.) + Porting to Python 3.13 ---------------------- @@ -1525,7 +1530,6 @@ Porting to Python 3.13 platforms, the ``HAVE_STDDEF_H`` macro is only defined on Windows. (Contributed by Victor Stinner in :gh:`108765`.) - Deprecated ---------- diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index c2a553e43ccc7c..94852325c05a57 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -25,7 +25,7 @@ PyAPI_FUNC(int) PyLong_CopyBits(PyObject* v, void* buffer, size_t n_bytes, /* PyLong_FromBits: Create an int value from a native integer n_bytes is the number of bytes to read from the buffer. Passing 0 will always produce the zero int. - PyLong_FromUnsignedBits always produces a positive int. + PyLong_FromUnsignedBits always produces a non-negative int. endianness is -1 for native endian, 0 for big endian or 1 for little. Returns the int object, or NULL with an exception set. */ diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst new file mode 100644 index 00000000000000..c6f45861b703b9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst @@ -0,0 +1,2 @@ +Adds :c:func:`PyLong_CopyBits`, :c:func:`PyLong_FromBits` and +:c:func:`PyLong_FromUnsignedBits` functions. From 6e1a89a41cceaa5e14191b30a5bd231f080661c8 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 7 Feb 2024 20:44:57 +0000 Subject: [PATCH 11/17] Rename to PyLong_AsNativeBytes --- Doc/c-api/long.rst | 8 ++-- Doc/whatsnew/3.13.rst | 7 ++-- Include/cpython/longobject.h | 18 +++++---- Lib/test/test_capi/test_long.py | 39 ++++++++++--------- ...-02-05-17-11-15.gh-issue-111140.WMEjid.rst | 2 + ...-02-05-17-11-15.gh-issue-111140.WMEjid.rst | 2 - Modules/_testcapi/long.c | 14 +++---- Objects/longobject.c | 14 +++---- 8 files changed, 56 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index f7488c81d4bd39..f4c70b951df403 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -113,10 +113,10 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`. -.. c:function:: PyObject* PyLong_FromBits(const void* buffer, size_t n_bytes, int endianness) +.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness) Create a Python integer from the value contained in the first *n_bytes* of - *buffer*, interpreted as twos-complement. + *buffer*, interpreted as a two's-complement signed number. *endianness* may be passed ``-1`` for the native endian that CPython was compiled with, or ``0`` for big endian and ``1`` for little. @@ -124,7 +124,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 -.. c:function:: PyObject* PyLong_FromUnsignedBits(const void* buffer, size_t n_bytes, int endianness) +.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness) Create a Python integer from the value contained in the first *n_bytes* of *buffer*, interpreted as an unsigned number. @@ -354,7 +354,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. -.. c:function:: int PyLong_CopyBits(PyObject *pylong, void* buffer, size_t n_bytes, int endianness) +.. c:function:: int PyLong_AsNativeBytes(PyObject *pylong, void* buffer, size_t n_bytes, int endianness) Copy the Python integer value to a native *buffer* of size *n_bytes*:: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 2782cf78c246b6..bd9b002e13fb60 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -560,6 +560,7 @@ Tier 2 IR by Mark Shannon and Guido van Rossum. Tier 2 optimizer by Ken Jin.) + Deprecated ========== @@ -1490,9 +1491,9 @@ New Features * Add :c:func:`Py_HashPointer` function to hash a pointer. (Contributed by Victor Stinner in :gh:`111545`.) -* Add :c:func:`PyLong_CopyBits`, :c:func:`PyLong_FromBits` and - :c:func:`PyLong_FromUnsignedBits` functions to simplify converting between - native integer types and Python ``int`` objects. +* Add :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and + :c:func:`PyLong_FromUnsignedNativeBytes` functions to simplify converting + between native integer types and Python ``int`` objects. (Contributed by Steve Dower in :gh:`111140`.) diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 94852325c05a57..1675ee6d0dbaf6 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,10 +4,14 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); -/* PyLong_CopyBits: Copy the integer value to a native buffer. +/* PyLong_AsNativeBytes: Copy the integer value to a native variable. + buffer points to the first byte of the variable. n_bytes is the number of bytes available in the buffer. Pass 0 to request the required size for the value. endianness is -1 for native endian, 0 for big endian or 1 for little. + Big endian mode will write the most significant byte into the address + directly referenced by buffer; little endian will write the least significant + byte into that address. If an exception is raised, returns a negative value. Otherwise, returns the number of bytes that are required to store the value. @@ -19,20 +23,20 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); may be larger than necessary - this function is not an accurate way to calculate the bit length of an integer object. */ -PyAPI_FUNC(int) PyLong_CopyBits(PyObject* v, void* buffer, size_t n_bytes, +PyAPI_FUNC(int) PyLong_AsNativeBytes(PyObject* v, void* buffer, size_t n_bytes, int endianness); -/* PyLong_FromBits: Create an int value from a native integer +/* PyLong_FromNativeBytes: Create an int value from a native integer n_bytes is the number of bytes to read from the buffer. Passing 0 will always produce the zero int. - PyLong_FromUnsignedBits always produces a non-negative int. + PyLong_FromUnsignedNativeBytes always produces a non-negative int. endianness is -1 for native endian, 0 for big endian or 1 for little. Returns the int object, or NULL with an exception set. */ -PyAPI_FUNC(PyObject*) PyLong_FromBits(const void* buffer, size_t n_bytes, - int endianness); -PyAPI_FUNC(PyObject*) PyLong_FromUnsignedBits(const void* buffer, size_t n_bytes, +PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness); +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, + size_t n_bytes, int endianness); PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 20e2b7e3b17a90..140c257d8c9ced 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -424,9 +424,9 @@ def test_long_asvoidptr(self): self.assertRaises(OverflowError, asvoidptr, -2**1000) # CRASHES asvoidptr(NULL) - def test_long_copybits(self): + def test_long_asnativebytes(self): import math - from _testcapi import pylong_copybits as copybits, SIZE_MAX + from _testcapi import pylong_asnativebytes as asnativebytes, SIZE_MAX # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) @@ -451,8 +451,8 @@ def test_long_copybits(self): ]: with self.subTest(f"sizeof-{v:X}"): buffer = bytearray(1) - self.assertEqual(expect, copybits(v, buffer, 0, -1), - "PyLong_CopyBits(v, NULL, 0, -1)") + self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), + "PyLong_AsNativeBytes(v, NULL, 0, -1)") # We request as many bytes as `expect_be` contains, and always check # the result (both big and little endian). We check the return value @@ -507,16 +507,19 @@ def test_long_copybits(self): buffer = bytearray(n) expect_le = expect_be[::-1] - self.assertEqual(expect_n, copybits(v, buffer, n, 0), - f"PyLong_CopyBits(v, buffer, {n}, )") + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") self.assertEqual(expect_be, buffer[:n], "") - self.assertEqual(expect_n, copybits(v, buffer, n, 1), - f"PyLong_CopyBits(v, buffer, {n}, )") + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 1), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") self.assertEqual(expect_le, buffer[:n], "") - def test_long_frombits(self): + def test_long_fromnativebytes(self): import math - from _testcapi import pylong_frombits as frombits, SIZE_MAX + from _testcapi import ( + pylong_fromnativebytes as fromnativebytes, + SIZE_MAX, + ) # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) @@ -534,14 +537,14 @@ def test_long_frombits(self): n = len(v_be) v_le = v_be[::-1] - self.assertEqual(expect_s, frombits(v_be, n, 0, 1), - f"PyLong_FromBits(buffer, {n}, )") - self.assertEqual(expect_s, frombits(v_le, n, 1, 1), - f"PyLong_FromBits(buffer, {n}, )") - self.assertEqual(expect_u, frombits(v_be, n, 0, 0), - f"PyLong_FromUnsignedBits(buffer, {n}, )") - self.assertEqual(expect_u, frombits(v_le, n, 1, 0), - f"PyLong_FromUnsignedBits(buffer, {n}, )") + self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_s, fromnativebytes(v_le, n, 1, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, 0, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_le, n, 1, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst new file mode 100644 index 00000000000000..a8aa191b5eb3ba --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst @@ -0,0 +1,2 @@ +Adds :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and +:c:func:`PyLong_FromUnsignedNativeBytes` functions. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst deleted file mode 100644 index c6f45861b703b9..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst +++ /dev/null @@ -1,2 +0,0 @@ -Adds :c:func:`PyLong_CopyBits`, :c:func:`PyLong_FromBits` and -:c:func:`PyLong_FromUnsignedBits` functions. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 9694d3c00eae36..1ea7baac57c76e 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -777,7 +777,7 @@ pylong_asvoidptr(PyObject *module, PyObject *arg) } static PyObject * -pylong_copybits(PyObject *module, PyObject *args) +pylong_asnativebytes(PyObject *module, PyObject *args) { PyObject *v; Py_buffer buffer; @@ -795,13 +795,13 @@ pylong_copybits(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - int res = PyLong_CopyBits(v, buffer.buf, n, (int)endianness); + int res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); PyBuffer_Release(&buffer); return res >= 0 ? PyLong_FromLong(res) : NULL; } static PyObject * -pylong_frombits(PyObject *module, PyObject *args) +pylong_fromnativebytes(PyObject *module, PyObject *args) { Py_buffer buffer; Py_ssize_t n, endianness, signed_; @@ -814,8 +814,8 @@ pylong_frombits(PyObject *module, PyObject *args) return NULL; } PyObject *res = signed_ - ? PyLong_FromBits(buffer.buf, n, (int)endianness) - : PyLong_FromUnsignedBits(buffer.buf, n, (int)endianness); + ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness) + : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness); PyBuffer_Release(&buffer); return res; } @@ -849,8 +849,8 @@ static PyMethodDef test_methods[] = { {"pylong_as_size_t", pylong_as_size_t, METH_O}, {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, - {"pylong_copybits", pylong_copybits, METH_VARARGS}, - {"pylong_frombits", pylong_frombits, METH_VARARGS}, + {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, + {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {NULL}, }; diff --git a/Objects/longobject.c b/Objects/longobject.c index 66022df3c43f7f..1a01123aa7cab1 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -971,9 +971,9 @@ _PyLong_AsByteArray(PyLongObject* v, It's crucial that every Python digit except for the MSD contribute exactly PyLong_SHIFT bits to the total, so first assert that the int is normalized. - NOTE: PyLong_CopyBits() assumes that this function will fill in 'n' bytes - even if it eventually fails to convert the whole number. Make sure you - account for that if you are changing this algorithm to return without + NOTE: PyLong_AsNativeBytes() assumes that this function will fill in 'n' + bytes even if it eventually fails to convert the whole number. Make sure + you account for that if you are changing this algorithm to return without doing that. */ assert(ndigits == 0 || v->long_value.ob_digit[ndigits - 1] != 0); @@ -1081,7 +1081,7 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) } int -PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) +PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) { PyLongObject *v; union { @@ -1097,7 +1097,7 @@ PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) } if ((size_t)(int)n != n || (int)n < 0) { - PyErr_SetString(PyExc_SystemError, "n_bytes too big to copy"); + PyErr_SetString(PyExc_SystemError, "n_bytes too big to convert"); return -1; } @@ -1218,7 +1218,7 @@ PyLong_CopyBits(PyObject* vv, void* buffer, size_t n, int endianness) PyObject * -PyLong_FromBits(const void* buffer, size_t n, int endianness) +PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) { if (!buffer) { PyErr_BadInternalCall(); @@ -1240,7 +1240,7 @@ PyLong_FromBits(const void* buffer, size_t n, int endianness) PyObject * -PyLong_FromUnsignedBits(const void* buffer, size_t n, int endianness) +PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) { if (!buffer) { PyErr_BadInternalCall(); From 7b446485997c3d35db88c1c19bc03714132ee136 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 7 Feb 2024 22:42:35 +0000 Subject: [PATCH 12/17] Buffer overflow in big endian --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 1a01123aa7cab1..e3304d26ede8cc 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1185,7 +1185,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) } else { memset(buffer, fill, n - sizeof(cv.b)); - memcpy((char *)buffer + n - sizeof(cv.b), cv.b, n); + memcpy((char *)buffer + n - sizeof(cv.b), cv.b, sizeof(cv.b)); } #endif } From 79dd452481ea954354ec70a4bfa4768ab9382355 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 8 Feb 2024 22:56:47 +0000 Subject: [PATCH 13/17] Review feedback --- Doc/c-api/long.rst | 9 ++++++--- Doc/whatsnew/3.13.rst | 2 +- Lib/test/test_capi/test_long.py | 31 ++++++++++++++++++++++++++++++- Modules/_testcapi/long.c | 4 +++- Objects/longobject.c | 31 ++++++++++++++++--------------- 5 files changed, 56 insertions(+), 21 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index f4c70b951df403..a1d2d0aaad6dd6 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -387,9 +387,12 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. bytes were copied into the buffer. Passing *n_bytes* of zero will always return the requested buffer size. - The requested buffer size may be larger than necessary, but only when the - larger size is required to contain the full value. This function is not an - accurate way to determine the bit length of a value. + + .. note:: + + When the value does not fit in the provided buffer, the requested size + returned from the function may be larger than necessary. Passing 0 to this + function is not an accurate way to determine the bit length of a value. .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index bd9b002e13fb60..10bb1502ab90af 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1493,7 +1493,7 @@ New Features * Add :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and :c:func:`PyLong_FromUnsignedNativeBytes` functions to simplify converting - between native integer types and Python ``int`` objects. + between native integer types and Python :class:`int` objects. (Contributed by Steve Dower in :gh:`111140`.) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 140c257d8c9ced..767d9d6170a01c 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -426,7 +426,11 @@ def test_long_asvoidptr(self): def test_long_asnativebytes(self): import math - from _testcapi import pylong_asnativebytes as asnativebytes, SIZE_MAX + from _testcapi import ( + pylong_asnativebytes as asnativebytes, + INT_MAX, + SIZE_MAX, + ) # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) @@ -453,6 +457,9 @@ def test_long_asnativebytes(self): buffer = bytearray(1) self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), "PyLong_AsNativeBytes(v, NULL, 0, -1)") + # Also check via the __index__ path + self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1), + "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)") # We request as many bytes as `expect_be` contains, and always check # the result (both big and little endian). We check the return value @@ -514,6 +521,19 @@ def test_long_asnativebytes(self): f"PyLong_AsNativeBytes(v, buffer, {n}, )") self.assertEqual(expect_le, buffer[:n], "") + # Check a few error conditions. These are validated in code, but are + # unspecified in docs, so if we make changes to the implementation, it's + # fine to just update these tests rather than preserve the behaviour. + with self.assertRaises(SystemError): + asnativebytes(1, buffer, 0, 2) + with self.assertRaises(TypeError): + asnativebytes('not a number', buffer, 0, -1) + with self.assertRaises(SystemError): + # n_bytes is taken as size_t and clamped to 'int'. With the sign + # change, we should always be able to pass INT_MAX+1 and see this + # failure. + asnativebytes(1, buffer, INT_MAX + 1, -1) + def test_long_fromnativebytes(self): import math from _testcapi import ( @@ -546,5 +566,14 @@ def test_long_fromnativebytes(self): self.assertEqual(expect_u, fromnativebytes(v_le, n, 1, 0), f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + # Check native endian when the result would be the same either + # way and we can test it. + if v_be == v_le: + self.assertEqual(expect_s, fromnativebytes(v_be, n, -1, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 1ea7baac57c76e..19717ac908a62c 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -790,7 +790,9 @@ pylong_asnativebytes(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - if (buffer.len < n) { + // Allow n > INT_MAX for tests - it will error out without writing to the + // buffer, so no overrun issues. + if (buffer.len < n && n <= INT_MAX) { PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); PyBuffer_Release(&buffer); return NULL; diff --git a/Objects/longobject.c b/Objects/longobject.c index e3304d26ede8cc..94140882ad17c9 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1080,6 +1080,19 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) return v_extended == 0 || v_extended == -1; } +static inline int +resolve_endianness(int *endianness) +{ + if (*endianness < 0) { + *endianness = PY_LITTLE_ENDIAN; + } + if (*endianness != 0 && *endianness != 1) { + PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + return -1; + } + return 0; +} + int PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) { @@ -1102,11 +1115,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) } int little_endian = endianness; - if (endianness < 0) { - endianness = PY_LITTLE_ENDIAN; - } - if (endianness != 0 && endianness != 1) { - PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + if (resolve_endianness(&little_endian) < 0) { return -1; } @@ -1226,11 +1235,7 @@ PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) } int little_endian = endianness; - if (endianness < 0) { - endianness = PY_LITTLE_ENDIAN; - } - if (endianness != 0 && endianness != 1) { - PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + if (resolve_endianness(&little_endian) < 0) { return NULL; } @@ -1248,11 +1253,7 @@ PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) } int little_endian = endianness; - if (endianness < 0) { - endianness = PY_LITTLE_ENDIAN; - } - if (endianness != 0 && endianness != 1) { - PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + if (resolve_endianness(&little_endian) < 0) { return NULL; } From 9693afec76b153306deb817e0ea0082496169d79 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 8 Feb 2024 22:59:09 +0000 Subject: [PATCH 14/17] _resolve_endianness --- Objects/longobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 94140882ad17c9..f006a1fe7955c0 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1081,7 +1081,7 @@ _fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) } static inline int -resolve_endianness(int *endianness) +_resolve_endianness(int *endianness) { if (*endianness < 0) { *endianness = PY_LITTLE_ENDIAN; @@ -1115,7 +1115,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) } int little_endian = endianness; - if (resolve_endianness(&little_endian) < 0) { + if (_resolve_endianness(&little_endian) < 0) { return -1; } @@ -1235,7 +1235,7 @@ PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) } int little_endian = endianness; - if (resolve_endianness(&little_endian) < 0) { + if (_resolve_endianness(&little_endian) < 0) { return NULL; } @@ -1253,7 +1253,7 @@ PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) } int little_endian = endianness; - if (resolve_endianness(&little_endian) < 0) { + if (_resolve_endianness(&little_endian) < 0) { return NULL; } From 376e358689e5fb2514f7c611d4bf2ddf773656a6 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 9 Feb 2024 01:06:50 +0000 Subject: [PATCH 15/17] More reliable test --- Lib/test/test_capi/test_long.py | 12 ++++++------ Modules/_testcapi/long.c | 8 ++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 767d9d6170a01c..3bcb95edbc1220 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -428,8 +428,8 @@ def test_long_asnativebytes(self): import math from _testcapi import ( pylong_asnativebytes as asnativebytes, - INT_MAX, - SIZE_MAX, + pylong_asnativebytes_too_big_n, + SIZE_MAX ) # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot @@ -528,11 +528,11 @@ def test_long_asnativebytes(self): asnativebytes(1, buffer, 0, 2) with self.assertRaises(TypeError): asnativebytes('not a number', buffer, 0, -1) + + # We pass any number we like, but the function will pass an n_bytes + # that is too big to make sure we fail with self.assertRaises(SystemError): - # n_bytes is taken as size_t and clamped to 'int'. With the sign - # change, we should always be able to pass INT_MAX+1 and see this - # failure. - asnativebytes(1, buffer, INT_MAX + 1, -1) + pylong_asnativebytes_too_big_n(100) def test_long_fromnativebytes(self): import math diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 19717ac908a62c..15f5085935541d 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -802,6 +802,13 @@ pylong_asnativebytes(PyObject *module, PyObject *args) return res >= 0 ? PyLong_FromLong(res) : NULL; } +static PyObject * +pylong_asnativebytes_too_big_n(PyObject *module, PyObject *v) +{ + int res = PyLong_AsNativeBytes(v, NULL, (size_t)INT_MAX + 1, -1); + return res >= 0 ? PyLong_FromLong(res) : NULL; +} + static PyObject * pylong_fromnativebytes(PyObject *module, PyObject *args) { @@ -852,6 +859,7 @@ static PyMethodDef test_methods[] = { {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, + {"pylong_asnativebytes_too_big_n", pylong_asnativebytes_too_big_n, METH_O}, {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {NULL}, }; From 1f2cf3abcafe2549d6b1a424d2300833dd9bffe4 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 9 Feb 2024 15:24:49 +0000 Subject: [PATCH 16/17] Doc tweaks --- Doc/c-api/long.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index a1d2d0aaad6dd6..9167e9d140f5e2 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -119,7 +119,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. *buffer*, interpreted as a two's-complement signed number. *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or ``0`` for big endian and ``1`` for little. + compiled with, or else ``0`` for big endian and ``1`` for little. .. versionadded:: 3.13 @@ -130,7 +130,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. *buffer*, interpreted as an unsigned number. *endianness* may be passed ``-1`` for the native endian that CPython was - compiled with, or ``0`` for big endian and ``1`` for little. + compiled with, or else ``0`` for big endian and ``1`` for little. .. versionadded:: 3.13 @@ -371,20 +371,21 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. *endianness* may be passed ``-1`` for the native endian that CPython was compiled with, or ``0`` for big endian and ``1`` for little. - Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as - an integer. Otherwise, returns the size of the buffer required to store the + Return ``-1`` with an exception raised if *pylong* cannot be interpreted as + an integer. Otherwise, return the size of the buffer required to store the value. If this is equal to or less than *n_bytes*, the entire value was copied. Unless an exception is raised, all *n_bytes* of the buffer will be written with as much of the value as can fit. This allows the caller to ignore all - non-negative results, if the intent is to match the typical behavior of a + non-negative results if the intent is to match the typical behavior of a C-style downcast. Values are always copied as twos-complement, and sufficient size will be requested for a sign bit. For example, this may cause an value that fits into - 8-bytes when treated as unsigned to request 9 bytes, even though all eight - bytes were copied into the buffer. + 8 bytes when treated as unsigned to request 9 bytes, even though all eight + bytes were copied into the buffer. What has been omitted is the zero sign + bit, which is redundant when the intention is to treat the value as unsigned. Passing *n_bytes* of zero will always return the requested buffer size. From 39962de0241590f0a9597b1f00df74ee593ac0a4 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 12 Feb 2024 17:47:36 +0000 Subject: [PATCH 17/17] Switch to Py_ssize_t consistently instead of int and size_t --- Doc/c-api/long.rst | 4 ++-- Include/cpython/longobject.h | 4 ++-- Lib/test/test_capi/test_long.py | 8 +------- Modules/_testcapi/long.c | 16 +++------------- Objects/longobject.c | 31 +++++++++++++------------------ 5 files changed, 21 insertions(+), 42 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 06bb2435565804..c39823e5e6787f 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -354,12 +354,12 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. -.. c:function:: int PyLong_AsNativeBytes(PyObject *pylong, void* buffer, size_t n_bytes, int endianness) +.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness) Copy the Python integer value to a native *buffer* of size *n_bytes*:: int value; - int bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); + Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); if (bytes < 0) { // Error occurred return NULL; diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 1675ee6d0dbaf6..07251db6bcc203 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -23,8 +23,8 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); may be larger than necessary - this function is not an accurate way to calculate the bit length of an integer object. */ -PyAPI_FUNC(int) PyLong_AsNativeBytes(PyObject* v, void* buffer, size_t n_bytes, - int endianness); +PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, + Py_ssize_t n_bytes, int endianness); /* PyLong_FromNativeBytes: Create an int value from a native integer n_bytes is the number of bytes to read from the buffer. Passing 0 will diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 3bcb95edbc1220..fc82cbfa66ea7a 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -428,8 +428,7 @@ def test_long_asnativebytes(self): import math from _testcapi import ( pylong_asnativebytes as asnativebytes, - pylong_asnativebytes_too_big_n, - SIZE_MAX + SIZE_MAX, ) # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot @@ -529,11 +528,6 @@ def test_long_asnativebytes(self): with self.assertRaises(TypeError): asnativebytes('not a number', buffer, 0, -1) - # We pass any number we like, but the function will pass an n_bytes - # that is too big to make sure we fail - with self.assertRaises(SystemError): - pylong_asnativebytes_too_big_n(100) - def test_long_fromnativebytes(self): import math from _testcapi import ( diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 15f5085935541d..dc21cf9f475228 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -790,23 +790,14 @@ pylong_asnativebytes(PyObject *module, PyObject *args) PyBuffer_Release(&buffer); return NULL; } - // Allow n > INT_MAX for tests - it will error out without writing to the - // buffer, so no overrun issues. - if (buffer.len < n && n <= INT_MAX) { + if (buffer.len < n) { PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); PyBuffer_Release(&buffer); return NULL; } - int res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); + Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); PyBuffer_Release(&buffer); - return res >= 0 ? PyLong_FromLong(res) : NULL; -} - -static PyObject * -pylong_asnativebytes_too_big_n(PyObject *module, PyObject *v) -{ - int res = PyLong_AsNativeBytes(v, NULL, (size_t)INT_MAX + 1, -1); - return res >= 0 ? PyLong_FromLong(res) : NULL; + return res >= 0 ? PyLong_FromSsize_t(res) : NULL; } static PyObject * @@ -859,7 +850,6 @@ static PyMethodDef test_methods[] = { {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, - {"pylong_asnativebytes_too_big_n", pylong_asnativebytes_too_big_n, METH_O}, {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {NULL}, }; diff --git a/Objects/longobject.c b/Objects/longobject.c index f006a1fe7955c0..932111f58425f2 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1093,8 +1093,8 @@ _resolve_endianness(int *endianness) return 0; } -int -PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) +Py_ssize_t +PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) { PyLongObject *v; union { @@ -1102,18 +1102,13 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) unsigned char b[sizeof(Py_ssize_t)]; } cv; int do_decref = 0; - int res = 0; + Py_ssize_t res = 0; - if (vv == NULL) { + if (vv == NULL || n < 0) { PyErr_BadInternalCall(); return -1; } - if ((size_t)(int)n != n || (int)n < 0) { - PyErr_SetString(PyExc_SystemError, "n_bytes too big to convert"); - return -1; - } - int little_endian = endianness; if (_resolve_endianness(&little_endian) < 0) { return -1; @@ -1146,13 +1141,13 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) memcpy(buffer, cv.b, n); } else { - for (size_t i = 0; i < n; ++i) { + for (Py_ssize_t i = 0; i < n; ++i) { ((unsigned char*)buffer)[n - i - 1] = cv.b[i]; } } #else if (little_endian) { - for (size_t i = 0; i < n; ++i) { + for (Py_ssize_t i = 0; i < n; ++i) { ((unsigned char*)buffer)[i] = cv.b[sizeof(cv.b) - i - 1]; } } @@ -1163,7 +1158,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) /* If we fit, return the requested number of bytes */ if (_fits_in_n_bits(cv.v, n * 8)) { - res = (int)n; + res = n; } } else { @@ -1175,20 +1170,20 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) } else { unsigned char *b = (unsigned char *)buffer; - for (size_t i = 0; i < n - sizeof(cv.b); ++i) { + for (Py_ssize_t i = 0; i < n - (int)sizeof(cv.b); ++i) { *b++ = fill; } - for (size_t i = sizeof(cv.b); i > 0; --i) { + for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) { *b++ = cv.b[i - 1]; } } #else if (little_endian) { unsigned char *b = (unsigned char *)buffer; - for (size_t i = sizeof(cv.b); i > 0; --i) { + for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) { *b++ = cv.b[i - 1]; } - for (size_t i = 0; i < n - sizeof(cv.b); ++i) { + for (Py_ssize_t i = 0; i < n - sizeof(cv.b); ++i) { *b++ = fill; } } @@ -1201,7 +1196,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) } else { if (n > 0) { - _PyLong_AsByteArray(v, buffer, n, little_endian, 1, 0); + _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0); } // More efficient calculation for number of bytes required? @@ -1210,7 +1205,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, size_t n, int endianness) * multiples of 8 to the next byte, but we add an implied bit for * the sign and it cancels out. */ size_t n_needed = (nb / 8) + 1; - res = (int)n_needed; + res = (Py_ssize_t)n_needed; if ((size_t)res != n_needed) { PyErr_SetString(PyExc_OverflowError, "value too large to convert");