From 167f426407a1549f22d8f2aaf591d0d0dea775c3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 15 Sep 2021 19:03:46 -0600 Subject: [PATCH 1/8] hashtable -> ref_indices. --- Python/marshal.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 346384edea6180..f11f9e28148e00 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -86,7 +86,7 @@ typedef struct { char *ptr; const char *end; char *buf; - _Py_hashtable_t *hashtable; + _Py_hashtable_t *ref_indices; int version; } WFILE; @@ -296,14 +296,14 @@ w_ref(PyObject *v, char *flag, WFILE *p) _Py_hashtable_entry_t *entry; int w; - if (p->version < 3 || p->hashtable == NULL) + if (p->version < 3 || p->ref_indices == NULL) return 0; /* not writing object references */ /* if it has only one reference, it definitely isn't shared */ if (Py_REFCNT(v) == 1) return 0; - entry = _Py_hashtable_get_entry(p->hashtable, v); + entry = _Py_hashtable_get_entry(p->ref_indices, v); if (entry != NULL) { /* write the reference index to the stream */ w = (int)(uintptr_t)entry->value; @@ -313,7 +313,7 @@ w_ref(PyObject *v, char *flag, WFILE *p) w_long(w, p); return 1; } else { - size_t s = p->hashtable->nentries; + size_t s = p->ref_indices->nentries; /* we don't support long indices */ if (s >= 0x7fffffff) { PyErr_SetString(PyExc_ValueError, "too many objects"); @@ -321,7 +321,7 @@ w_ref(PyObject *v, char *flag, WFILE *p) } w = (int)s; Py_INCREF(v); - if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) { + if (_Py_hashtable_set(p->ref_indices, v, (void *)(uintptr_t)w) < 0) { Py_DECREF(v); goto err; } @@ -594,10 +594,10 @@ static int w_init_refs(WFILE *wf, int version) { if (version >= 3) { - wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr, - _Py_hashtable_compare_direct, - w_decref_entry, NULL, NULL); - if (wf->hashtable == NULL) { + wf->ref_indices = _Py_hashtable_new_full(_Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + w_decref_entry, NULL, NULL); + if (wf->ref_indices == NULL) { PyErr_NoMemory(); return -1; } @@ -608,8 +608,8 @@ w_init_refs(WFILE *wf, int version) static void w_clear_refs(WFILE *wf) { - if (wf->hashtable != NULL) { - _Py_hashtable_destroy(wf->hashtable); + if (wf->ref_indices != NULL) { + _Py_hashtable_destroy(wf->ref_indices); } } From 549db475404bea8590eb8652a338f8f4cfc41a47 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 15 Sep 2021 22:25:38 -0600 Subject: [PATCH 2/8] Use a second pass to exclude refs for objects used only once. --- Python/marshal.c | 83 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index f11f9e28148e00..091ea927327f0a 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -14,6 +14,7 @@ #include "marshal.h" #include "pycore_hashtable.h" #include "pycore_code.h" // _PyCode_New() +#include /*[clinic input] module marshal @@ -87,6 +88,7 @@ typedef struct { const char *end; char *buf; _Py_hashtable_t *ref_indices; + ssize_t refs_numobjects; int version; } WFILE; @@ -305,32 +307,63 @@ w_ref(PyObject *v, char *flag, WFILE *p) entry = _Py_hashtable_get_entry(p->ref_indices, v); if (entry != NULL) { - /* write the reference index to the stream */ w = (int)(uintptr_t)entry->value; - /* we don't store "long" indices in the dict */ - assert(0 <= w && w <= 0x7fffffff); - w_byte(TYPE_REF, p); - w_long(w, p); - return 1; - } else { - size_t s = p->ref_indices->nentries; + if (p->refs_numobjects < 0) { + /* It's an extra ref on the first pass. */ + w -= 1; + entry->value = (void *)(uintptr_t)w; + w_byte(TYPE_REF, p); + w_long(-1, p); + return 1; + } + else if (w >= 0) { + /* It's an extra ref on the second (or only) pass. */ + /* write the reference index to the stream */ + /* we don't store "long" indices in the dict */ + assert(w <= 0x7fffffff); + w_byte(TYPE_REF, p); + w_long(w, p); + return 1; + } + else if (w == -1) { + /* It's the first ref on the second pass, with no extra refs. + So we treat it as though it has a refcount of 1. */ + return 0; + } + else { + /* It's the first ref on the second pass, with extra refs, */ + assert(w < 0); + entry->value = (void *)(uintptr_t)p->refs_numobjects; + p->refs_numobjects += 1; + *flag |= FLAG_REF; + return 0; + } + } + + /* It's the object's first ref. */ + if (p->refs_numobjects < 0) { + /* It's the first pass. */ + w = -1; + } + else { + /* It's the second (or only) pass. */ /* we don't support long indices */ - if (s >= 0x7fffffff) { + if (p->refs_numobjects >= 0x7fffffff) { PyErr_SetString(PyExc_ValueError, "too many objects"); - goto err; - } - w = (int)s; - Py_INCREF(v); - if (_Py_hashtable_set(p->ref_indices, v, (void *)(uintptr_t)w) < 0) { - Py_DECREF(v); - goto err; + p->error = WFERR_UNMARSHALLABLE; + return 1; } - *flag |= FLAG_REF; - return 0; + w = (int)p->refs_numobjects; + p->refs_numobjects += 1; + } + Py_INCREF(v); + if (_Py_hashtable_set(p->ref_indices, v, (void *)(uintptr_t)w) < 0) { + Py_DECREF(v); + p->error = WFERR_UNMARSHALLABLE; + return 1; } -err: - p->error = WFERR_UNMARSHALLABLE; - return 1; + *flag |= FLAG_REF; + return 0; } static void @@ -1665,7 +1698,15 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) Py_DECREF(wf.str); return NULL; } + /* Make a first pass tracking which objects have multiple refs. */ + wf.refs_numobjects = -1; w_object(x, &wf); + /* Make a second pass de-duplicating the multiple refs. */ + wf.refs_numobjects = 0; + wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str); + wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str); + w_object(x, &wf); + w_clear_refs(&wf); if (wf.str != NULL) { const char *base = PyBytes_AS_STRING(wf.str); From 725001b46c3938cd8db5283a337ef9d466d7d8b9 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 15 Sep 2021 22:33:39 -0600 Subject: [PATCH 3/8] Add a NEWS entry. --- .../Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst new file mode 100644 index 00000000000000..2688cab322cddd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst @@ -0,0 +1,2 @@ +Make marshal output the same whether or not it's a debug build. The fix has +a side-effect of making un-marshaling a little faster. From 890b2f93dc1b02327b13a49355a3edd6e70a6885 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Sep 2021 13:07:53 -0600 Subject: [PATCH 4/8] ref_indices -> hashtable. --- Python/marshal.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 091ea927327f0a..c058e3addbaed9 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -87,7 +87,7 @@ typedef struct { char *ptr; const char *end; char *buf; - _Py_hashtable_t *ref_indices; + _Py_hashtable_t *hashtable; ssize_t refs_numobjects; int version; } WFILE; @@ -298,14 +298,14 @@ w_ref(PyObject *v, char *flag, WFILE *p) _Py_hashtable_entry_t *entry; int w; - if (p->version < 3 || p->ref_indices == NULL) + if (p->version < 3 || p->hashtable == NULL) return 0; /* not writing object references */ /* if it has only one reference, it definitely isn't shared */ if (Py_REFCNT(v) == 1) return 0; - entry = _Py_hashtable_get_entry(p->ref_indices, v); + entry = _Py_hashtable_get_entry(p->hashtable, v); if (entry != NULL) { w = (int)(uintptr_t)entry->value; if (p->refs_numobjects < 0) { @@ -357,7 +357,7 @@ w_ref(PyObject *v, char *flag, WFILE *p) p->refs_numobjects += 1; } Py_INCREF(v); - if (_Py_hashtable_set(p->ref_indices, v, (void *)(uintptr_t)w) < 0) { + if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) { Py_DECREF(v); p->error = WFERR_UNMARSHALLABLE; return 1; @@ -627,10 +627,10 @@ static int w_init_refs(WFILE *wf, int version) { if (version >= 3) { - wf->ref_indices = _Py_hashtable_new_full(_Py_hashtable_hash_ptr, - _Py_hashtable_compare_direct, - w_decref_entry, NULL, NULL); - if (wf->ref_indices == NULL) { + wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + w_decref_entry, NULL, NULL); + if (wf->hashtable == NULL) { PyErr_NoMemory(); return -1; } @@ -641,8 +641,8 @@ w_init_refs(WFILE *wf, int version) static void w_clear_refs(WFILE *wf) { - if (wf->ref_indices != NULL) { - _Py_hashtable_destroy(wf->ref_indices); + if (wf->hashtable != NULL) { + _Py_hashtable_destroy(wf->hashtable); } } From 2aa38f63ee11510ab59c9adf6b5e6e380905e3a4 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Sep 2021 13:09:57 -0600 Subject: [PATCH 5/8] Use Py_ssize_t. --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/marshal.c b/Python/marshal.c index c058e3addbaed9..78105f430fb712 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -88,7 +88,7 @@ typedef struct { const char *end; char *buf; _Py_hashtable_t *hashtable; - ssize_t refs_numobjects; + Py_ssize_t refs_numobjects; int version; } WFILE; From b506b454511d86a34e61879b34e7b3d69c144400 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Sep 2021 13:26:19 -0600 Subject: [PATCH 6/8] Make the two-pass solution optional. --- Python/clinic/marshal.c.h | 34 +++++++++++++++++++++++++--------- Python/marshal.c | 35 +++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/Python/clinic/marshal.c.h b/Python/clinic/marshal.c.h index f80d5ef31f29c7..f4794c248a82f1 100644 --- a/Python/clinic/marshal.c.h +++ b/Python/clinic/marshal.c.h @@ -73,7 +73,7 @@ PyDoc_STRVAR(marshal_load__doc__, {"load", (PyCFunction)marshal_load, METH_O, marshal_load__doc__}, PyDoc_STRVAR(marshal_dumps__doc__, -"dumps($module, value, version=version, /)\n" +"dumps($module, value, version=version, /, *, stable=True)\n" "--\n" "\n" "Return the bytes object that would be written to a file by dump(value, file).\n" @@ -87,31 +87,47 @@ PyDoc_STRVAR(marshal_dumps__doc__, "unsupported type."); #define MARSHAL_DUMPS_METHODDEF \ - {"dumps", (PyCFunction)(void(*)(void))marshal_dumps, METH_FASTCALL, marshal_dumps__doc__}, + {"dumps", (PyCFunction)(void(*)(void))marshal_dumps, METH_FASTCALL|METH_KEYWORDS, marshal_dumps__doc__}, static PyObject * -marshal_dumps_impl(PyObject *module, PyObject *value, int version); +marshal_dumps_impl(PyObject *module, PyObject *value, int version, + int stable); static PyObject * -marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "", "stable", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "dumps", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *value; int version = Py_MARSHAL_VERSION; + int stable = 1; - if (!_PyArg_CheckPositional("dumps", nargs, 1, 2)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { goto exit; } value = args[0]; if (nargs < 2) { - goto skip_optional; + goto skip_optional_posonly; } + noptargs--; version = _PyLong_AsInt(args[1]); if (version == -1 && PyErr_Occurred()) { goto exit; } -skip_optional: - return_value = marshal_dumps_impl(module, value, version); +skip_optional_posonly: + if (!noptargs) { + goto skip_optional_kwonly; + } + stable = PyObject_IsTrue(args[2]); + if (stable < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = marshal_dumps_impl(module, value, version, stable); exit: return return_value; @@ -155,4 +171,4 @@ marshal_loads(PyObject *module, PyObject *arg) return return_value; } -/*[clinic end generated code: output=68b78f38bfe0c06d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2e77fd1eb7a09094 input=a9049054013a1b77]*/ diff --git a/Python/marshal.c b/Python/marshal.c index 78105f430fb712..f334c40fb28f33 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1678,8 +1678,8 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) return result; } -PyObject * -PyMarshal_WriteObjectToString(PyObject *x, int version) +static PyObject * +write_object_to_string(PyObject *x, int version, bool stable) { WFILE wf; @@ -1698,13 +1698,15 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) Py_DECREF(wf.str); return NULL; } - /* Make a first pass tracking which objects have multiple refs. */ - wf.refs_numobjects = -1; - w_object(x, &wf); - /* Make a second pass de-duplicating the multiple refs. */ - wf.refs_numobjects = 0; - wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str); - wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str); + if (stable) { + /* Make a first pass tracking which objects have multiple refs. */ + wf.refs_numobjects = -1; + w_object(x, &wf); + /* Make a second pass de-duplicating the multiple refs. */ + wf.refs_numobjects = 0; + wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str); + wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str); + } w_object(x, &wf); w_clear_refs(&wf); @@ -1726,6 +1728,12 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) return wf.str; } +PyObject * +PyMarshal_WriteObjectToString(PyObject *x, int version) +{ + return write_object_to_string(x, version, 0); +} + /* And an interface for Python programs... */ /*[clinic input] marshal.dump @@ -1830,6 +1838,8 @@ marshal.dumps version: int(c_default="Py_MARSHAL_VERSION") = version Indicates the data format that dumps should use. / + * + stable: bool = True Return the bytes object that would be written to a file by dump(value, file). @@ -1838,10 +1848,11 @@ unsupported type. [clinic start generated code]*/ static PyObject * -marshal_dumps_impl(PyObject *module, PyObject *value, int version) -/*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/ +marshal_dumps_impl(PyObject *module, PyObject *value, int version, + int stable) +/*[clinic end generated code: output=87276039e6c75faf input=b460e8ba0aa325ac]*/ { - return PyMarshal_WriteObjectToString(value, version); + return write_object_to_string(value, version, stable); } /*[clinic input] From 8f9ba8d384c3e4ea5bdc57e8dfb9c368ae913195 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Sep 2021 13:33:40 -0600 Subject: [PATCH 7/8] Just use int for bool. --- Python/marshal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index f334c40fb28f33..07786135f6a4af 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -14,7 +14,6 @@ #include "marshal.h" #include "pycore_hashtable.h" #include "pycore_code.h" // _PyCode_New() -#include /*[clinic input] module marshal @@ -1679,7 +1678,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) } static PyObject * -write_object_to_string(PyObject *x, int version, bool stable) +write_object_to_string(PyObject *x, int version, int stable) { WFILE wf; From 27ad07589aa94200959503b566c95da434e254d2 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Sep 2021 16:12:57 -0600 Subject: [PATCH 8/8] Change the BPO number. --- ...-45186.O-gZHM.rst => 2021-09-15-22-33-31.bpo-34093.O-gZHM.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Core and Builtins/{2021-09-15-22-33-31.bpo-45186.O-gZHM.rst => 2021-09-15-22-33-31.bpo-34093.O-gZHM.rst} (100%) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-34093.O-gZHM.rst similarity index 100% rename from Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-45186.O-gZHM.rst rename to Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-34093.O-gZHM.rst