diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-34093.O-gZHM.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-34093.O-gZHM.rst new file mode 100644 index 00000000000000..2688cab322cddd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-09-15-22-33-31.bpo-34093.O-gZHM.rst @@ -0,0 +1,2 @@ +Make marshal output the same whether or not it's a debug build. The fix has +a side-effect of making un-marshaling a little faster. diff --git a/Python/clinic/marshal.c.h b/Python/clinic/marshal.c.h index f80d5ef31f29c7..f4794c248a82f1 100644 --- a/Python/clinic/marshal.c.h +++ b/Python/clinic/marshal.c.h @@ -73,7 +73,7 @@ PyDoc_STRVAR(marshal_load__doc__, {"load", (PyCFunction)marshal_load, METH_O, marshal_load__doc__}, PyDoc_STRVAR(marshal_dumps__doc__, -"dumps($module, value, version=version, /)\n" +"dumps($module, value, version=version, /, *, stable=True)\n" "--\n" "\n" "Return the bytes object that would be written to a file by dump(value, file).\n" @@ -87,31 +87,47 @@ PyDoc_STRVAR(marshal_dumps__doc__, "unsupported type."); #define MARSHAL_DUMPS_METHODDEF \ - {"dumps", (PyCFunction)(void(*)(void))marshal_dumps, METH_FASTCALL, marshal_dumps__doc__}, + {"dumps", (PyCFunction)(void(*)(void))marshal_dumps, METH_FASTCALL|METH_KEYWORDS, marshal_dumps__doc__}, static PyObject * -marshal_dumps_impl(PyObject *module, PyObject *value, int version); +marshal_dumps_impl(PyObject *module, PyObject *value, int version, + int stable); static PyObject * -marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "", "stable", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "dumps", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *value; int version = Py_MARSHAL_VERSION; + int stable = 1; - if (!_PyArg_CheckPositional("dumps", nargs, 1, 2)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { goto exit; } value = args[0]; if (nargs < 2) { - goto skip_optional; + goto skip_optional_posonly; } + noptargs--; version = _PyLong_AsInt(args[1]); if (version == -1 && PyErr_Occurred()) { goto exit; } -skip_optional: - return_value = marshal_dumps_impl(module, value, version); +skip_optional_posonly: + if (!noptargs) { + goto skip_optional_kwonly; + } + stable = PyObject_IsTrue(args[2]); + if (stable < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = marshal_dumps_impl(module, value, version, stable); exit: return return_value; @@ -155,4 +171,4 @@ marshal_loads(PyObject *module, PyObject *arg) return return_value; } -/*[clinic end generated code: output=68b78f38bfe0c06d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2e77fd1eb7a09094 input=a9049054013a1b77]*/ diff --git a/Python/marshal.c b/Python/marshal.c index 346384edea6180..07786135f6a4af 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -87,6 +87,7 @@ typedef struct { const char *end; char *buf; _Py_hashtable_t *hashtable; + Py_ssize_t refs_numobjects; int version; } WFILE; @@ -305,32 +306,63 @@ w_ref(PyObject *v, char *flag, WFILE *p) entry = _Py_hashtable_get_entry(p->hashtable, v); if (entry != NULL) { - /* write the reference index to the stream */ w = (int)(uintptr_t)entry->value; - /* we don't store "long" indices in the dict */ - assert(0 <= w && w <= 0x7fffffff); - w_byte(TYPE_REF, p); - w_long(w, p); - return 1; - } else { - size_t s = p->hashtable->nentries; + if (p->refs_numobjects < 0) { + /* It's an extra ref on the first pass. */ + w -= 1; + entry->value = (void *)(uintptr_t)w; + w_byte(TYPE_REF, p); + w_long(-1, p); + return 1; + } + else if (w >= 0) { + /* It's an extra ref on the second (or only) pass. */ + /* write the reference index to the stream */ + /* we don't store "long" indices in the dict */ + assert(w <= 0x7fffffff); + w_byte(TYPE_REF, p); + w_long(w, p); + return 1; + } + else if (w == -1) { + /* It's the first ref on the second pass, with no extra refs. + So we treat it as though it has a refcount of 1. */ + return 0; + } + else { + /* It's the first ref on the second pass, with extra refs, */ + assert(w < 0); + entry->value = (void *)(uintptr_t)p->refs_numobjects; + p->refs_numobjects += 1; + *flag |= FLAG_REF; + return 0; + } + } + + /* It's the object's first ref. */ + if (p->refs_numobjects < 0) { + /* It's the first pass. */ + w = -1; + } + else { + /* It's the second (or only) pass. */ /* we don't support long indices */ - if (s >= 0x7fffffff) { + if (p->refs_numobjects >= 0x7fffffff) { PyErr_SetString(PyExc_ValueError, "too many objects"); - goto err; - } - w = (int)s; - Py_INCREF(v); - if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) { - Py_DECREF(v); - goto err; + p->error = WFERR_UNMARSHALLABLE; + return 1; } - *flag |= FLAG_REF; - return 0; + w = (int)p->refs_numobjects; + p->refs_numobjects += 1; } -err: - p->error = WFERR_UNMARSHALLABLE; - return 1; + Py_INCREF(v); + if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) { + Py_DECREF(v); + p->error = WFERR_UNMARSHALLABLE; + return 1; + } + *flag |= FLAG_REF; + return 0; } static void @@ -1645,8 +1677,8 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) return result; } -PyObject * -PyMarshal_WriteObjectToString(PyObject *x, int version) +static PyObject * +write_object_to_string(PyObject *x, int version, int stable) { WFILE wf; @@ -1665,7 +1697,17 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) Py_DECREF(wf.str); return NULL; } + if (stable) { + /* Make a first pass tracking which objects have multiple refs. */ + wf.refs_numobjects = -1; + w_object(x, &wf); + /* Make a second pass de-duplicating the multiple refs. */ + wf.refs_numobjects = 0; + wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str); + wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str); + } w_object(x, &wf); + w_clear_refs(&wf); if (wf.str != NULL) { const char *base = PyBytes_AS_STRING(wf.str); @@ -1685,6 +1727,12 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) return wf.str; } +PyObject * +PyMarshal_WriteObjectToString(PyObject *x, int version) +{ + return write_object_to_string(x, version, 0); +} + /* And an interface for Python programs... */ /*[clinic input] marshal.dump @@ -1789,6 +1837,8 @@ marshal.dumps version: int(c_default="Py_MARSHAL_VERSION") = version Indicates the data format that dumps should use. / + * + stable: bool = True Return the bytes object that would be written to a file by dump(value, file). @@ -1797,10 +1847,11 @@ unsupported type. [clinic start generated code]*/ static PyObject * -marshal_dumps_impl(PyObject *module, PyObject *value, int version) -/*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/ +marshal_dumps_impl(PyObject *module, PyObject *value, int version, + int stable) +/*[clinic end generated code: output=87276039e6c75faf input=b460e8ba0aa325ac]*/ { - return PyMarshal_WriteObjectToString(value, version); + return write_object_to_string(value, version, stable); } /*[clinic input]