Skip to content

gh-78274: Produce consistent output from marshal.dumps(). #28379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Make marshal output the same whether or not it's a debug build. The fix has
a side-effect of making un-marshaling a little faster.
34 changes: 25 additions & 9 deletions Python/clinic/marshal.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

103 changes: 77 additions & 26 deletions Python/marshal.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ typedef struct {
const char *end;
char *buf;
_Py_hashtable_t *hashtable;
Py_ssize_t refs_numobjects;
int version;
} WFILE;

Expand Down Expand Up @@ -305,32 +306,63 @@ w_ref(PyObject *v, char *flag, WFILE *p)

entry = _Py_hashtable_get_entry(p->hashtable, v);
if (entry != NULL) {
/* write the reference index to the stream */
w = (int)(uintptr_t)entry->value;
/* we don't store "long" indices in the dict */
assert(0 <= w && w <= 0x7fffffff);
w_byte(TYPE_REF, p);
w_long(w, p);
return 1;
} else {
size_t s = p->hashtable->nentries;
if (p->refs_numobjects < 0) {
/* It's an extra ref on the first pass. */
w -= 1;
entry->value = (void *)(uintptr_t)w;
w_byte(TYPE_REF, p);
w_long(-1, p);
return 1;
}
else if (w >= 0) {
/* It's an extra ref on the second (or only) pass. */
/* write the reference index to the stream */
/* we don't store "long" indices in the dict */
assert(w <= 0x7fffffff);
w_byte(TYPE_REF, p);
w_long(w, p);
return 1;
}
else if (w == -1) {
/* It's the first ref on the second pass, with no extra refs.
So we treat it as though it has a refcount of 1. */
return 0;
}
else {
/* It's the first ref on the second pass, with extra refs, */
assert(w < 0);
entry->value = (void *)(uintptr_t)p->refs_numobjects;
p->refs_numobjects += 1;
*flag |= FLAG_REF;
return 0;
}
}

/* It's the object's first ref. */
if (p->refs_numobjects < 0) {
/* It's the first pass. */
w = -1;
}
else {
/* It's the second (or only) pass. */
/* we don't support long indices */
if (s >= 0x7fffffff) {
if (p->refs_numobjects >= 0x7fffffff) {
PyErr_SetString(PyExc_ValueError, "too many objects");
goto err;
}
w = (int)s;
Py_INCREF(v);
if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
Py_DECREF(v);
goto err;
p->error = WFERR_UNMARSHALLABLE;
return 1;
}
*flag |= FLAG_REF;
return 0;
w = (int)p->refs_numobjects;
p->refs_numobjects += 1;
}
err:
p->error = WFERR_UNMARSHALLABLE;
return 1;
Py_INCREF(v);
if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
Py_DECREF(v);
p->error = WFERR_UNMARSHALLABLE;
return 1;
}
*flag |= FLAG_REF;
return 0;
}

static void
Expand Down Expand Up @@ -1645,8 +1677,8 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
return result;
}

PyObject *
PyMarshal_WriteObjectToString(PyObject *x, int version)
static PyObject *
write_object_to_string(PyObject *x, int version, int stable)
{
WFILE wf;

Expand All @@ -1665,7 +1697,17 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
Py_DECREF(wf.str);
return NULL;
}
if (stable) {
/* Make a first pass tracking which objects have multiple refs. */
wf.refs_numobjects = -1;
w_object(x, &wf);
/* Make a second pass de-duplicating the multiple refs. */
wf.refs_numobjects = 0;
wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
}
w_object(x, &wf);

w_clear_refs(&wf);
if (wf.str != NULL) {
const char *base = PyBytes_AS_STRING(wf.str);
Expand All @@ -1685,6 +1727,12 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
return wf.str;
}

PyObject *
PyMarshal_WriteObjectToString(PyObject *x, int version)
{
return write_object_to_string(x, version, 0);
}

/* And an interface for Python programs... */
/*[clinic input]
marshal.dump
Expand Down Expand Up @@ -1789,6 +1837,8 @@ marshal.dumps
version: int(c_default="Py_MARSHAL_VERSION") = version
Indicates the data format that dumps should use.
/
*
stable: bool = True
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would probably make sense to default to False for now, so users would effectively opt-in to the performance penalty. (The same goes for marshal.dump().)


Return the bytes object that would be written to a file by dump(value, file).

Expand All @@ -1797,10 +1847,11 @@ unsupported type.
[clinic start generated code]*/

static PyObject *
marshal_dumps_impl(PyObject *module, PyObject *value, int version)
/*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
int stable)
/*[clinic end generated code: output=87276039e6c75faf input=b460e8ba0aa325ac]*/
{
return PyMarshal_WriteObjectToString(value, version);
return write_object_to_string(value, version, stable);
}

/*[clinic input]
Expand Down