Skip to content

gh-129005: Move bytearray to use bytes as a buffer #130563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/cpython/bytearrayobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ typedef struct {
char *ob_bytes; /* Physical backing buffer */
char *ob_start; /* Logical start inside ob_bytes */
Py_ssize_t ob_exports; /* How many buffer exports */
PyObject *ob_bytes_head; /* bytes enabling zero-copy detach. */
} PyByteArrayObject;

PyAPI_DATA(char) _PyByteArray_empty_string[];
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,7 @@ def test_objecttypes(self):
samples = [b'', b'u'*100000]
for sample in samples:
x = bytearray(sample)
check(x, vsize('n2Pi') + x.__alloc__())
check(x, vsize('n2PiP') + x.__alloc__())
# bytearray_iterator
check(iter(bytearray()), size('nP'))
# bytes
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Move :class:`bytearray` to use a buffer convertible to :class:`bytes` without copying.
101 changes: 62 additions & 39 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ class bytearray "PyByteArrayObject *" "&PyByteArray_Type"
char _PyByteArray_empty_string[] = "";

/* Helpers */
static void
bytearray_set_bytes(PyByteArrayObject *self, PyObject *bytes, Py_ssize_t size)
{
/* Always point to a valid bytes object */
assert(bytes);
assert(PyBytes_CheckExact(bytes));
self->ob_bytes_head = bytes;
self->ob_bytes = self->ob_start = PyBytes_AS_STRING(bytes);

Py_ssize_t alloc = PyBytes_GET_SIZE(bytes);
alloc = (alloc == 0 ? alloc : alloc + 1);
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, alloc);
Py_SET_SIZE(self, size);
}

static int
_getbytevalue(PyObject* arg, int *value)
Expand Down Expand Up @@ -127,7 +141,6 @@ PyObject *
PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
{
PyByteArrayObject *new;
Py_ssize_t alloc;

if (size < 0) {
PyErr_SetString(PyExc_SystemError,
Expand All @@ -141,27 +154,17 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
}

new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
if (new == NULL)
if (new == NULL) {
return NULL;

if (size == 0) {
new->ob_bytes = NULL;
alloc = 0;
}
else {
alloc = size + 1;
new->ob_bytes = PyMem_Malloc(alloc);
if (new->ob_bytes == NULL) {
Py_DECREF(new);
return PyErr_NoMemory();
}
if (bytes != NULL && size > 0)
memcpy(new->ob_bytes, bytes, size);
new->ob_bytes[size] = '\0'; /* Trailing null byte */

PyObject *buffer = PyBytes_FromStringAndSize(bytes, size);
if (buffer == NULL) {
Py_DECREF(new);
return NULL;
}
Py_SET_SIZE(new, size);
new->ob_alloc = alloc;
new->ob_start = new->ob_bytes;

bytearray_set_bytes(new, buffer, size);
new->ob_exports = 0;

return (PyObject *)new;
Expand Down Expand Up @@ -189,7 +192,6 @@ static int
bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size)
{
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
void *sval;
PyByteArrayObject *obj = ((PyByteArrayObject *)self);
/* All computations are done unsigned to avoid integer overflows
(see issue #22335). */
Expand Down Expand Up @@ -245,26 +247,27 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size)
}

if (logical_offset > 0) {
sval = PyMem_Malloc(alloc);
if (sval == NULL) {
PyErr_NoMemory();
/* Make a new non-offset buffer */
PyObject *new = PyBytes_FromStringAndSize(NULL, alloc - 1);
if (new == NULL) {
return -1;
}
memcpy(sval, PyByteArray_AS_STRING(self),
memcpy(PyBytes_AS_STRING(new), PyByteArray_AS_STRING(self),
Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self)));
PyMem_Free(obj->ob_bytes);
Py_DECREF(obj->ob_bytes_head);
bytearray_set_bytes(obj, new, size);
obj->ob_bytes[size] = '\0'; /* Trailing null byte */

return 0;
}
else {
sval = PyMem_Realloc(obj->ob_bytes, alloc);
if (sval == NULL) {
PyErr_NoMemory();
return -1;
}

/* Too small, grow allocation */
if (_PyBytes_Resize(&obj->ob_bytes_head, alloc - 1) < 0) {
Copy link
Contributor Author

@cmaloney cmaloney Feb 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

behavior change: _PyBytes_Resize behaves differently than PyMem_Realloc here. _PyBytes_Resize can change the location of the buffer. That means the address returned from PyByteArray_AS_STRING may change on any resize. Previously it changed only when went from empty <-> non-empty bytearray (empty points to a constant storage).

https://docs.python.org/3/c-api/memory.html#c.PyMem_Realloc

bytearray_set_bytes(obj, PyBytes_FromStringAndSize(NULL, 0), 0);
return -1;
}

obj->ob_bytes = obj->ob_start = sval;
Py_SET_SIZE(self, size);
FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, alloc);
bytearray_set_bytes(obj, obj->ob_bytes_head, size);
obj->ob_bytes[size] = '\0'; /* Trailing null byte */

return 0;
Expand Down Expand Up @@ -870,6 +873,19 @@ bytearray_ass_subscript(PyObject *op, PyObject *index, PyObject *values)
return ret;
}

static PyObject *
bytearray_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
{
PyObject *obj = subtype->tp_alloc(subtype, 0);
if (obj == NULL) {
return NULL;
}

PyByteArrayObject *self = _PyByteArray_CAST(obj);
bytearray_set_bytes(self, PyBytes_FromStringAndSize(NULL, 0), 0);
return obj;
}

/*[clinic input]
bytearray.__init__

Expand Down Expand Up @@ -1233,9 +1249,7 @@ bytearray_dealloc(PyObject *op)
"deallocated bytearray object has exported buffers");
PyErr_Print();
}
if (self->ob_bytes != 0) {
PyMem_Free(self->ob_bytes);
}
Py_XDECREF(self->ob_bytes_head);
Py_TYPE(self)->tp_free((PyObject *)self);
}

Expand Down Expand Up @@ -2452,7 +2466,16 @@ static PyObject *
bytearray_alloc(PyObject *op, PyObject *Py_UNUSED(ignored))
{
PyByteArrayObject *self = _PyByteArray_CAST(op);
return PyLong_FromSsize_t(FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc));

Py_ssize_t size = PyBytes_GET_SIZE(self->ob_bytes_head);
Py_ssize_t alloc = (size == 0 ? size : size + 1);

#ifndef NDEBUG
/* Validate redundant information is redundant */
Py_ssize_t ob_alloc = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc);
assert(alloc == ob_alloc);
#endif
return PyLong_FromSsize_t(alloc);
}

/*[clinic input]
Expand Down Expand Up @@ -2820,7 +2843,7 @@ PyTypeObject PyByteArray_Type = {
0, /* tp_dictoffset */
(initproc)bytearray___init__, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
PyType_GenericNew, /* tp_new */
bytearray_new, /* tp_new */
PyObject_Free, /* tp_free */
.tp_version_tag = _Py_TYPE_VERSION_BYTEARRAY,
};
Expand Down
Loading