Skip to content

Commit 3daaafb

Browse files
bpo-32037: Use the INT opcode for 32-bit integers in protocol 0 pickles. (#4407)
1 parent 0a2abdf commit 3daaafb

File tree

5 files changed

+70
-63
lines changed

5 files changed

+70
-63
lines changed

Lib/pickle.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,10 @@ def save_long(self, obj):
674674
else:
675675
self.write(LONG4 + pack("<i", n) + encoded)
676676
return
677-
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
677+
if -0x80000000 <= obj <= 0x7fffffff:
678+
self.write(INT + repr(obj).encode("ascii") + b'\n')
679+
else:
680+
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
678681
dispatch[int] = save_long
679682

680683
def save_float(self, obj):

Lib/pickletools.py

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,35 +2480,35 @@ def __init__(self, value):
24802480
0: ( MARK
24812481
1: l LIST (MARK at 0)
24822482
2: p PUT 0
2483-
5: L LONG 1
2484-
9: a APPEND
2485-
10: L LONG 2
2486-
14: a APPEND
2487-
15: ( MARK
2488-
16: L LONG 3
2489-
20: L LONG 4
2490-
24: t TUPLE (MARK at 15)
2491-
25: p PUT 1
2492-
28: a APPEND
2493-
29: ( MARK
2494-
30: d DICT (MARK at 29)
2495-
31: p PUT 2
2496-
34: c GLOBAL '_codecs encode'
2497-
50: p PUT 3
2498-
53: ( MARK
2499-
54: V UNICODE 'abc'
2500-
59: p PUT 4
2501-
62: V UNICODE 'latin1'
2502-
70: p PUT 5
2503-
73: t TUPLE (MARK at 53)
2504-
74: p PUT 6
2505-
77: R REDUCE
2506-
78: p PUT 7
2507-
81: V UNICODE 'def'
2508-
86: p PUT 8
2509-
89: s SETITEM
2510-
90: a APPEND
2511-
91: . STOP
2483+
5: I INT 1
2484+
8: a APPEND
2485+
9: I INT 2
2486+
12: a APPEND
2487+
13: ( MARK
2488+
14: I INT 3
2489+
17: I INT 4
2490+
20: t TUPLE (MARK at 13)
2491+
21: p PUT 1
2492+
24: a APPEND
2493+
25: ( MARK
2494+
26: d DICT (MARK at 25)
2495+
27: p PUT 2
2496+
30: c GLOBAL '_codecs encode'
2497+
46: p PUT 3
2498+
49: ( MARK
2499+
50: V UNICODE 'abc'
2500+
55: p PUT 4
2501+
58: V UNICODE 'latin1'
2502+
66: p PUT 5
2503+
69: t TUPLE (MARK at 49)
2504+
70: p PUT 6
2505+
73: R REDUCE
2506+
74: p PUT 7
2507+
77: V UNICODE 'def'
2508+
82: p PUT 8
2509+
85: s SETITEM
2510+
86: a APPEND
2511+
87: . STOP
25122512
highest protocol among opcodes = 0
25132513
25142514
Try again with a "binary" pickle.
@@ -2577,13 +2577,13 @@ def __init__(self, value):
25772577
93: p PUT 6
25782578
96: V UNICODE 'value'
25792579
103: p PUT 7
2580-
106: L LONG 42
2581-
111: s SETITEM
2582-
112: b BUILD
2583-
113: a APPEND
2584-
114: g GET 5
2585-
117: a APPEND
2586-
118: . STOP
2580+
106: I INT 42
2581+
110: s SETITEM
2582+
111: b BUILD
2583+
112: a APPEND
2584+
113: g GET 5
2585+
116: a APPEND
2586+
117: . STOP
25872587
highest protocol among opcodes = 0
25882588
25892589
>>> dis(pickle.dumps(x, 1))

Lib/test/pickletester.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,7 +1821,7 @@ def test_simple_newobj(self):
18211821
with self.subTest(proto=proto):
18221822
s = self.dumps(x, proto)
18231823
if proto < 1:
1824-
self.assertIn(b'\nL64206', s) # LONG
1824+
self.assertIn(b'\nI64206', s) # INT
18251825
else:
18261826
self.assertIn(b'M\xce\xfa', s) # BININT2
18271827
self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
@@ -1837,7 +1837,7 @@ def test_complex_newobj(self):
18371837
with self.subTest(proto=proto):
18381838
s = self.dumps(x, proto)
18391839
if proto < 1:
1840-
self.assertIn(b'\nL64206', s) # LONG
1840+
self.assertIn(b'\nI64206', s) # INT
18411841
elif proto < 2:
18421842
self.assertIn(b'M\xce\xfa', s) # BININT2
18431843
elif proto < 4:
@@ -1857,7 +1857,7 @@ def test_complex_newobj_ex(self):
18571857
with self.subTest(proto=proto):
18581858
s = self.dumps(x, proto)
18591859
if proto < 1:
1860-
self.assertIn(b'\nL64206', s) # LONG
1860+
self.assertIn(b'\nI64206', s) # INT
18611861
elif proto < 2:
18621862
self.assertIn(b'M\xce\xfa', s) # BININT2
18631863
elif proto < 4:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Integers that fit in a signed 32-bit integer will be now pickled with
2+
protocol 0 using the INT opcode. This will decrease the size of a pickle,
3+
speed up pickling and unpickling, and make these integers be unpickled as
4+
int instances in Python 2.

Modules/_pickle.c

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,18 +1858,13 @@ save_long(PicklerObject *self, PyObject *obj)
18581858
PyObject *repr = NULL;
18591859
Py_ssize_t size;
18601860
long val;
1861+
int overflow;
18611862
int status = 0;
18621863

1863-
const char long_op = LONG;
1864-
1865-
val= PyLong_AsLong(obj);
1866-
if (val == -1 && PyErr_Occurred()) {
1867-
/* out of range for int pickling */
1868-
PyErr_Clear();
1869-
}
1870-
else if (self->bin &&
1871-
(sizeof(long) <= 4 ||
1872-
(val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
1864+
val= PyLong_AsLongAndOverflow(obj, &overflow);
1865+
if (!overflow && (sizeof(long) <= 4 ||
1866+
(val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
1867+
{
18731868
/* result fits in a signed 4-byte integer.
18741869
18751870
Note: we can't use -0x80000000L in the above condition because some
@@ -1882,31 +1877,35 @@ save_long(PicklerObject *self, PyObject *obj)
18821877
char pdata[32];
18831878
Py_ssize_t len = 0;
18841879

1885-
pdata[1] = (unsigned char)(val & 0xff);
1886-
pdata[2] = (unsigned char)((val >> 8) & 0xff);
1887-
pdata[3] = (unsigned char)((val >> 16) & 0xff);
1888-
pdata[4] = (unsigned char)((val >> 24) & 0xff);
1889-
1890-
if ((pdata[4] == 0) && (pdata[3] == 0)) {
1891-
if (pdata[2] == 0) {
1892-
pdata[0] = BININT1;
1893-
len = 2;
1880+
if (self->bin) {
1881+
pdata[1] = (unsigned char)(val & 0xff);
1882+
pdata[2] = (unsigned char)((val >> 8) & 0xff);
1883+
pdata[3] = (unsigned char)((val >> 16) & 0xff);
1884+
pdata[4] = (unsigned char)((val >> 24) & 0xff);
1885+
1886+
if ((pdata[4] != 0) || (pdata[3] != 0)) {
1887+
pdata[0] = BININT;
1888+
len = 5;
18941889
}
1895-
else {
1890+
else if (pdata[2] != 0) {
18961891
pdata[0] = BININT2;
18971892
len = 3;
18981893
}
1894+
else {
1895+
pdata[0] = BININT1;
1896+
len = 2;
1897+
}
18991898
}
19001899
else {
1901-
pdata[0] = BININT;
1902-
len = 5;
1900+
sprintf(pdata, "%c%ld\n", INT, val);
1901+
len = strlen(pdata);
19031902
}
1904-
19051903
if (_Pickler_Write(self, pdata, len) < 0)
19061904
return -1;
19071905

19081906
return 0;
19091907
}
1908+
assert(!PyErr_Occurred());
19101909

19111910
if (self->proto >= 2) {
19121911
/* Linear-time pickling. */
@@ -1986,6 +1985,7 @@ save_long(PicklerObject *self, PyObject *obj)
19861985
goto error;
19871986
}
19881987
else {
1988+
const char long_op = LONG;
19891989
const char *string;
19901990

19911991
/* proto < 2: write the repr and newline. This is quadratic-time (in

0 commit comments

Comments
 (0)