From 788c1889264c6962790807411d1343f43fea9bee Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 4 Aug 2023 00:56:10 -0500 Subject: [PATCH 1/7] use reconstructor function for pickling, see also issue #206 --- bitarray/__init__.py | 1 + bitarray/_bitarray.c | 80 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/bitarray/__init__.py b/bitarray/__init__.py index ff21ecbd..f7640594 100644 --- a/bitarray/__init__.py +++ b/bitarray/__init__.py @@ -12,6 +12,7 @@ from __future__ import absolute_import from bitarray._bitarray import (bitarray, decodetree, _sysinfo, + _bitarray_reconstructor, get_default_endian, _set_default_endian, __version__) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 09d6f7e8..7b061b6c 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1163,8 +1163,20 @@ static PyObject * bitarray_reduce(bitarrayobject *self) { const Py_ssize_t nbytes = Py_SIZE(self); - PyObject *dict, *repr = NULL, *result = NULL; - char *str; + static PyObject *reconstructor = NULL; + PyObject *dict, *bytes, *result = NULL; + + if (reconstructor == NULL) { + PyObject *bitarray_module; + + if ((bitarray_module = PyImport_ImportModule("bitarray")) == NULL) + return NULL; + reconstructor = PyObject_GetAttrString(bitarray_module, + "_bitarray_reconstructor"); + Py_DECREF(bitarray_module); + if (reconstructor == NULL) + return NULL; + } dict = PyObject_GetAttrString((PyObject *) self, "__dict__"); if (dict == NULL) { @@ -1173,21 +1185,17 @@ bitarray_reduce(bitarrayobject *self) Py_INCREF(dict); } - repr = PyBytes_FromStringAndSize(NULL, nbytes + 1); - if (repr == NULL) - goto error; - - str = PyBytes_AsString(repr); - /* first byte contains the number of pad bits */ - *str = (char) set_padbits(self); - /* remaining bytes contain buffer */ - memcpy(str + 1, self->ob_item, (size_t) nbytes); + bytes = PyBytes_FromStringAndSize(NULL, nbytes); + if (bytes == NULL) { + Py_DECREF(dict); + return NULL; + } + memcpy(PyBytes_AsString(bytes), self->ob_item, (size_t) nbytes); - result = Py_BuildValue("O(Os)O", Py_TYPE(self), - repr, ENDIAN_STR(self->endian), dict); - error: + result = Py_BuildValue( + "O(OnOsi)O", reconstructor, Py_TYPE(self), self->nbits, bytes, + ENDIAN_STR(self->endian), self->readonly, dict); Py_DECREF(dict); - Py_XDECREF(repr); return result; } @@ -4001,6 +4009,45 @@ static PyTypeObject Bitarray_Type = { /***************************** Module functions ***************************/ +static PyObject * +reconstructor(PyObject *module, PyObject *args) +{ + PyTypeObject *type; + Py_ssize_t nbits, nbytes; + PyObject *res, *bytes; + char *endian_str; + int endian, readonly; + + if (!PyArg_ParseTuple(args, "OnOsi:_bitarray_reconstructor", + &type, &nbits, &bytes, &endian_str, &readonly)) + return NULL; + + if ((endian = endian_from_string(endian_str)) < 0) + return NULL; + + if (!PyBytes_Check(bytes)) + return PyErr_Format(PyExc_TypeError, "bytes expected, got '%s'", + Py_TYPE(bytes)->tp_name); + + nbytes = PyBytes_GET_SIZE(bytes); + if (nbytes != BYTES(nbits)) + return PyErr_Format(PyExc_ValueError, + "size mismatch: %zd != %zd (%zd bits)", + nbytes, BYTES(nbits), nbits); + + res = newbitarrayobject(type, nbits, endian); + if (res == NULL) + return NULL; + memcpy(((bitarrayobject *) res)->ob_item, PyBytes_AS_STRING(bytes), + (size_t) nbytes); + if (readonly) + ((bitarrayobject *) res)->readonly = 1; + return res; +} + +PyDoc_STRVAR(reconstructor_doc, "Internal. Used for pickling support."); + + static PyObject * get_default_endian(PyObject *module) { @@ -4079,6 +4126,9 @@ Return tuple containing:\n\ static PyMethodDef module_functions[] = { + {"_bitarray_reconstructor", + (PyCFunction) reconstructor, METH_VARARGS, + reconstructor_doc}, {"get_default_endian", (PyCFunction) get_default_endian, METH_NOARGS, get_default_endian_doc}, {"_set_default_endian", (PyCFunction) set_default_endian, METH_VARARGS, From cde434b769682f60cb057f40c41efd78aac261d8 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 4 Aug 2023 01:39:35 -0500 Subject: [PATCH 2/7] fix reference count bugs --- bitarray/_bitarray.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 7b061b6c..17410f14 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1164,7 +1164,7 @@ bitarray_reduce(bitarrayobject *self) { const Py_ssize_t nbytes = Py_SIZE(self); static PyObject *reconstructor = NULL; - PyObject *dict, *bytes, *result = NULL; + PyObject *dict, *bytes, *result; if (reconstructor == NULL) { PyObject *bitarray_module; @@ -1196,6 +1196,7 @@ bitarray_reduce(bitarrayobject *self) "O(OnOsi)O", reconstructor, Py_TYPE(self), self->nbits, bytes, ENDIAN_STR(self->endian), self->readonly, dict); Py_DECREF(dict); + Py_DECREF(bytes); return result; } @@ -4040,8 +4041,11 @@ reconstructor(PyObject *module, PyObject *args) return NULL; memcpy(((bitarrayobject *) res)->ob_item, PyBytes_AS_STRING(bytes), (size_t) nbytes); - if (readonly) - ((bitarrayobject *) res)->readonly = 1; + if (readonly) { + PyObject *ret; + ret = bitarray_freeze((bitarrayobject *) res); + Py_DECREF(ret); /* drop None */ + } return res; } From fe283ad1932c07b6d67f0f9dcee6f29435378c4d Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 4 Aug 2023 03:10:48 -0500 Subject: [PATCH 3/7] simplify new code --- bitarray/_bitarray.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 17410f14..50841e5c 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1192,15 +1192,15 @@ bitarray_reduce(bitarrayobject *self) } memcpy(PyBytes_AsString(bytes), self->ob_item, (size_t) nbytes); - result = Py_BuildValue( - "O(OnOsi)O", reconstructor, Py_TYPE(self), self->nbits, bytes, - ENDIAN_STR(self->endian), self->readonly, dict); + result = Py_BuildValue("O(OOsii)O", reconstructor, Py_TYPE(self), bytes, + ENDIAN_STR(self->endian), PADBITS(self), + self->readonly, dict); Py_DECREF(dict); Py_DECREF(bytes); return result; } -PyDoc_STRVAR(reduce_doc, "state information for pickling"); +PyDoc_STRVAR(reduce_doc, "Internal. Used for pickling support."); static PyObject * @@ -4014,13 +4014,13 @@ static PyObject * reconstructor(PyObject *module, PyObject *args) { PyTypeObject *type; - Py_ssize_t nbits, nbytes; + Py_ssize_t nbytes; PyObject *res, *bytes; char *endian_str; - int endian, readonly; + int endian, padbits, readonly; - if (!PyArg_ParseTuple(args, "OnOsi:_bitarray_reconstructor", - &type, &nbits, &bytes, &endian_str, &readonly)) + if (!PyArg_ParseTuple(args, "OOsii:_bitarray_reconstructor", + &type, &bytes, &endian_str, &padbits, &readonly)) return NULL; if ((endian = endian_from_string(endian_str)) < 0) @@ -4030,27 +4030,23 @@ reconstructor(PyObject *module, PyObject *args) return PyErr_Format(PyExc_TypeError, "bytes expected, got '%s'", Py_TYPE(bytes)->tp_name); - nbytes = PyBytes_GET_SIZE(bytes); - if (nbytes != BYTES(nbits)) - return PyErr_Format(PyExc_ValueError, - "size mismatch: %zd != %zd (%zd bits)", - nbytes, BYTES(nbits), nbits); + if (padbits < 0 || padbits >= 8) + return PyErr_Format(PyExc_ValueError, "padbits = %d", padbits); - res = newbitarrayobject(type, nbits, endian); + nbytes = PyBytes_GET_SIZE(bytes); + res = newbitarrayobject(type, 8 * nbytes - padbits, endian); if (res == NULL) return NULL; - memcpy(((bitarrayobject *) res)->ob_item, PyBytes_AS_STRING(bytes), - (size_t) nbytes); +#define rr ((bitarrayobject *) res) + memcpy(rr->ob_item, PyBytes_AS_STRING(bytes), (size_t) nbytes); if (readonly) { - PyObject *ret; - ret = bitarray_freeze((bitarrayobject *) res); - Py_DECREF(ret); /* drop None */ + set_padbits(rr); + rr->readonly = 1; } +#undef rr return res; } -PyDoc_STRVAR(reconstructor_doc, "Internal. Used for pickling support."); - static PyObject * get_default_endian(PyObject *module) @@ -4132,7 +4128,7 @@ Return tuple containing:\n\ static PyMethodDef module_functions[] = { {"_bitarray_reconstructor", (PyCFunction) reconstructor, METH_VARARGS, - reconstructor_doc}, + reduce_doc}, {"get_default_endian", (PyCFunction) get_default_endian, METH_NOARGS, get_default_endian_doc}, {"_set_default_endian", (PyCFunction) set_default_endian, METH_VARARGS, From 8affc0354d404c81d55e72716e0c731c8fe8ebc3 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 4 Aug 2023 07:58:46 -0500 Subject: [PATCH 4/7] simplify bitarray_reduce() --- bitarray/_bitarray.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 50841e5c..a6d21076 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1162,7 +1162,6 @@ When the optional `index` is given, only invert the single bit at index."); static PyObject * bitarray_reduce(bitarrayobject *self) { - const Py_ssize_t nbytes = Py_SIZE(self); static PyObject *reconstructor = NULL; PyObject *dict, *bytes, *result; @@ -1185,12 +1184,11 @@ bitarray_reduce(bitarrayobject *self) Py_INCREF(dict); } - bytes = PyBytes_FromStringAndSize(NULL, nbytes); + bytes = PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); if (bytes == NULL) { Py_DECREF(dict); return NULL; } - memcpy(PyBytes_AsString(bytes), self->ob_item, (size_t) nbytes); result = Py_BuildValue("O(OOsii)O", reconstructor, Py_TYPE(self), bytes, ENDIAN_STR(self->endian), PADBITS(self), From 0f7006ddcbc685a95ee01de983113e3a25e757e6 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Fri, 4 Aug 2023 17:11:47 -0500 Subject: [PATCH 5/7] add missing set_padbits() - update comment --- bitarray/_bitarray.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index a6d21076..861ff679 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1184,6 +1184,7 @@ bitarray_reduce(bitarrayobject *self) Py_INCREF(dict); } + set_padbits(self); bytes = PyBytes_FromStringAndSize(self->ob_item, Py_SIZE(self)); if (bytes == NULL) { Py_DECREF(dict); @@ -3645,7 +3646,7 @@ bitarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyIndex_Check(initial)) return newbitarray_from_index(type, initial, endian); - /* bytes (for pickling) - must have head byte (0x00 .. 0x07) */ + /* bytes (for pickling) - to be removed, see #206 */ if (PyBytes_Check(initial) && PyBytes_GET_SIZE(initial) > 0) { char head = *PyBytes_AS_STRING(initial); if ((head & 0xf8) == 0) From 18b6dd87e868b4e0bb94a242c8210d25ad7c5da1 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 5 Aug 2023 08:49:23 -0500 Subject: [PATCH 6/7] add new pickle test data file --- bitarray/{test_data.pickle => test_150.pickle} | Bin bitarray/test_281.pickle | Bin 0 -> 442 bytes bitarray/test_bitarray.py | 13 +++++++++---- 3 files changed, 9 insertions(+), 4 deletions(-) rename bitarray/{test_data.pickle => test_150.pickle} (100%) create mode 100644 bitarray/test_281.pickle diff --git a/bitarray/test_data.pickle b/bitarray/test_150.pickle similarity index 100% rename from bitarray/test_data.pickle rename to bitarray/test_150.pickle diff --git a/bitarray/test_281.pickle b/bitarray/test_281.pickle new file mode 100644 index 0000000000000000000000000000000000000000..8827214c95e862c3fa91667a92fd291a7de07b68 GIT binary patch literal 442 zcmY+A$xg#C6h)nZ7R+4cd8T2iI{W}qmf3lgU1_4EiBvVZ#$AA9sh^JN3u)NB#qqiK zdhTd2FSHqBX&+V`~Ha9Bx?2t5`!(Ks#90nP}e3g;DlK^rawt~9R8 y4L#ia(mY#QZ;jPdy%V^noqaqknDS#D>uuY#%M*AaxQk~J{u}i|&R2oAS?33Z(r_*S literal 0 HcmV?d00001 diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index e2089029..b14b17ed 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -3709,10 +3709,8 @@ def test_pickle(self): for key in d1.keys(): self.assertEQUAL(d1[key], d2[key]) - @skipIf(sys.version_info[0] == 2) - def test_pickle_load(self): - # the test data file was created using bitarray 1.5.0 / Python 3.5.5 - path = os.path.join(os.path.dirname(__file__), 'test_data.pickle') + def check_file(self, fn): + path = os.path.join(os.path.dirname(__file__), fn) with open(path, 'rb') as fi: d = pickle.load(fi) @@ -3740,6 +3738,13 @@ def test_pickle_load(self): self.assertTrue(f.readonly) self.check_obj(f) + @skipIf(sys.version_info[0] == 2) + def test_pickle_load(self): + # test data file was created using bitarray 1.5.0 / Python 3.5.5 + self.check_file('test_150.pickle') + # using bitarray 2.8.1 / Python 3.5.5 (_bitarray_reconstructor) + self.check_file('test_281.pickle') + @skipIf(pyodide) # pyodide has no dbm module def test_shelve(self): if hasattr(sys, 'gettotalrefcount'): From e0fc94a5af3b624866553ecb3619111aed5a5ba2 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Sat, 5 Aug 2023 08:58:47 -0500 Subject: [PATCH 7/7] improve error message - update changelog --- CHANGE_LOG | 1 + bitarray/_bitarray.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGE_LOG b/CHANGE_LOG index b7e7338c..40ae65f3 100644 --- a/CHANGE_LOG +++ b/CHANGE_LOG @@ -1,5 +1,6 @@ 2023-XX-XX 2.8.1: ------------------- + * use reconstructor function for pickling, see #207 2023-07-22 2.8.0: diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 861ff679..1290f4c5 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -4022,15 +4022,16 @@ reconstructor(PyObject *module, PyObject *args) &type, &bytes, &endian_str, &padbits, &readonly)) return NULL; - if ((endian = endian_from_string(endian_str)) < 0) - return NULL; - if (!PyBytes_Check(bytes)) return PyErr_Format(PyExc_TypeError, "bytes expected, got '%s'", Py_TYPE(bytes)->tp_name); + if ((endian = endian_from_string(endian_str)) < 0) + return NULL; + if (padbits < 0 || padbits >= 8) - return PyErr_Format(PyExc_ValueError, "padbits = %d", padbits); + return PyErr_Format(PyExc_ValueError, + "padbits not in range(0, 8), got %d", padbits); nbytes = PyBytes_GET_SIZE(bytes); res = newbitarrayobject(type, 8 * nbytes - padbits, endian);