Skip to content

Commit 9d7e8f1

Browse files
committed
Make *stable* optional
1 parent e170116 commit 9d7e8f1

File tree

4 files changed

+113
-47
lines changed

4 files changed

+113
-47
lines changed

Doc/library/marshal.rst

+16-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ bytes-like objects.
5454
The module defines these functions:
5555

5656

57-
.. function:: dump(value, file[, version])
57+
.. function:: dump(value, file[, version [, stable]])
5858

5959
Write the value on the open file. The value must be a supported type. The
6060
file must be a writeable :term:`binary file`.
@@ -66,6 +66,13 @@ The module defines these functions:
6666
The *version* argument indicates the data format that ``dump`` should use
6767
(see below).
6868

69+
The *stable* argument makes generated data more stable as possible.
70+
It guarantees ``dump(value1, 4, True) == dump(value2, 4, True)``
71+
for ``value1 is value2``, but not for ``value1 == value2``.
72+
73+
.. versionadded:: 3.8
74+
*stable* option is added.
75+
6976

7077
.. function:: load(file)
7178

@@ -80,7 +87,7 @@ The module defines these functions:
8087
:func:`load` will substitute ``None`` for the unmarshallable type.
8188

8289

83-
.. function:: dumps(value[, version])
90+
.. function:: dumps(value[, version [, stable]])
8491

8592
Return the bytes object that would be written to a file by ``dump(value, file)``. The
8693
value must be a supported type. Raise a :exc:`ValueError` exception if value
@@ -89,6 +96,13 @@ The module defines these functions:
8996
The *version* argument indicates the data format that ``dumps`` should use
9097
(see below).
9198

99+
The *stable* argument makes generated data more stable as possible.
100+
It guarantees ``dump(value1, 4, True) == dump(value2, 4, True)``
101+
for ``value1 is value2``, but not for ``value1 == value2``.
102+
103+
.. versionadded:: 3.8
104+
*stable* option is added.
105+
92106

93107
.. function:: loads(bytes)
94108

Lib/test/test_marshal.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,11 @@ def test_stable_refs(self):
331331
z = [y, y]
332332
dummy = x # refcnt of x must be >1
333333

334-
data = marshal.dumps(x)
335334
# x is used once, FLAG_REF must not be set.
335+
data = marshal.dumps(x, 4, True)
336336
self.assertEqual(b"i\x42\x00\x00\x00", data)
337337

338-
data = marshal.dumps(z)
338+
data = marshal.dumps(z, 4, True)
339339
# y is used twice, but x is used once because y is reused.
340340
self.assertEqual(b"[\x02\x00\x00\x00" + # list(size=2)i\x42\x00\x00\x00", data)
341341
b"\xa9\x01" + # small tuple(size=1) | FLAG_REF

Python/clinic/marshal.c.h

+18-11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/marshal.c

+77-32
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ typedef struct {
8888
_Py_hashtable_t *hashtable;
8989
int last_index;
9090
int version;
91+
int stable;
9192
} WFILE;
9293

9394
#define w_byte(c, p) do { \
@@ -282,29 +283,61 @@ w_ref(PyObject *v, char *flag, WFILE *p)
282283
}
283284

284285
entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
285-
if (entry == NULL) {
286-
return 0;
287-
}
288286

289-
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
290-
// w >= 0: index written by previous w_ref()
291-
// w < 0 : refcnt counted by w_count_refs()
292-
if (w == -1) {
293-
// This object is used only once.
294-
return 0;
295-
}
287+
if (p->stable) {
288+
if (entry == NULL) {
289+
return 0;
290+
}
296291

297-
if (w >= 0) {
298-
/* we don't store "long" indices in the dict */
299-
assert(0 <= w && w <= 0x7fffffff);
300-
w_byte(TYPE_REF, p);
301-
w_long(w, p);
292+
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
293+
// w >= 0: index written by previous w_ref()
294+
// w < 0 : refcnt counted by w_count_refs()
295+
if (w == -1) {
296+
// This object is used only once.
297+
return 0;
298+
}
299+
300+
if (w >= 0) {
301+
/* we don't store "long" indices in the dict */
302+
assert(0 <= w && w <= 0x7fffffff);
303+
w_byte(TYPE_REF, p);
304+
w_long(w, p);
305+
return 1;
306+
} else {
307+
w = p->last_index++;
308+
_Py_HASHTABLE_ENTRY_WRITE_DATA(p->hashtable, entry, w);
309+
*flag |= FLAG_REF;
310+
return 0;
311+
}
312+
}
313+
else {
314+
if (entry != NULL) {
315+
/* write the reference index to the stream */
316+
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
317+
/* we don't store "long" indices in the dict */
318+
assert(0 <= w && w <= 0x7fffffff);
319+
w_byte(TYPE_REF, p);
320+
w_long(w, p);
321+
return 1;
322+
} else {
323+
size_t s = p->hashtable->entries;
324+
/* we don't support long indices */
325+
if (s >= 0x7fffffff) {
326+
PyErr_SetString(PyExc_ValueError, "too many objects");
327+
goto err;
328+
}
329+
w = (int)s;
330+
Py_INCREF(v);
331+
if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
332+
Py_DECREF(v);
333+
goto err;
334+
}
335+
*flag |= FLAG_REF;
336+
return 0;
337+
}
338+
err:
339+
p->error = WFERR_UNMARSHALLABLE;
302340
return 1;
303-
} else {
304-
w = p->last_index++;
305-
_Py_HASHTABLE_ENTRY_WRITE_DATA(p->hashtable, entry, w);
306-
*flag |= FLAG_REF;
307-
return 0;
308341
}
309342
}
310343

@@ -711,7 +744,10 @@ w_init_refs(WFILE *wf, int version, PyObject *x)
711744
}
712745
wf->last_index = 0;
713746

714-
return w_count_refs(x, wf);
747+
if (wf->stable) {
748+
return w_count_refs(x, wf);
749+
}
750+
return 0;
715751
}
716752

717753
static int
@@ -1725,19 +1761,17 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
17251761
return result;
17261762
}
17271763

1728-
PyObject *
1729-
PyMarshal_WriteObjectToString(PyObject *x, int version)
1764+
static PyObject *
1765+
marshal_to_string(PyObject *x, int version, int stable)
17301766
{
1731-
WFILE wf;
1767+
WFILE wf = {.stable=stable, .version=version};
17321768

1733-
memset(&wf, 0, sizeof(wf));
17341769
wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
17351770
if (wf.str == NULL)
17361771
return NULL;
17371772
wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
17381773
wf.end = wf.ptr + PyBytes_Size(wf.str);
17391774
wf.error = WFERR_OK;
1740-
wf.version = version;
17411775
if (w_init_refs(&wf, version, x)) {
17421776
Py_DECREF(wf.str);
17431777
return NULL;
@@ -1768,6 +1802,12 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
17681802
return wf.str;
17691803
}
17701804

1805+
PyObject *
1806+
PyMarshal_WriteObjectToString(PyObject *x, int version)
1807+
{
1808+
return marshal_to_string(x, version, 0);
1809+
}
1810+
17711811
/* And an interface for Python programs... */
17721812
/*[clinic input]
17731813
marshal.dump
@@ -1778,6 +1818,8 @@ marshal.dump
17781818
Must be a writeable binary file.
17791819
version: int(c_default="Py_MARSHAL_VERSION") = version
17801820
Indicates the data format that dump should use.
1821+
stable: bool = False
1822+
Generate stable output as possible.
17811823
/
17821824
17831825
Write the value on the open file.
@@ -1789,15 +1831,15 @@ to the file. The object will not be properly read back by load().
17891831

17901832
static PyObject *
17911833
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1792-
int version)
1793-
/*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1834+
int version, int stable)
1835+
/*[clinic end generated code: output=b472bdb1b466baa1 input=89780da6b9530e4b]*/
17941836
{
17951837
/* XXX Quick hack -- need to do this differently */
17961838
PyObject *s;
17971839
PyObject *res;
17981840
_Py_IDENTIFIER(write);
17991841

1800-
s = PyMarshal_WriteObjectToString(value, version);
1842+
s = marshal_to_string(value, version, stable);
18011843
if (s == NULL)
18021844
return NULL;
18031845
res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
@@ -1871,6 +1913,8 @@ marshal.dumps
18711913
Must be a supported type.
18721914
version: int(c_default="Py_MARSHAL_VERSION") = version
18731915
Indicates the data format that dumps should use.
1916+
stable: bool = False
1917+
Generate stable output as possible.
18741918
/
18751919
18761920
Return the bytes object that would be written to a file by dump(value, file).
@@ -1880,10 +1924,11 @@ unsupported type.
18801924
[clinic start generated code]*/
18811925

18821926
static PyObject *
1883-
marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1884-
/*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1927+
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
1928+
int stable)
1929+
/*[clinic end generated code: output=87276039e6c75faf input=afce1546a470f153]*/
18851930
{
1886-
return PyMarshal_WriteObjectToString(value, version);
1931+
return marshal_to_string(value, version, stable);
18871932
}
18881933

18891934
/*[clinic input]

0 commit comments

Comments
 (0)