Skip to content

Commit a8b9350

Browse files
authored
bpo-45340: Don't create object dictionaries unless actually needed (GH-28802)
* Never change types' cached keys. It could invalidate inline attribute objects. * Lazily create object dictionaries. * Update specialization of LOAD/STORE_ATTR. * Don't update shared keys version for deletion of value. * Update gdb support to handle instance values. * Rename SPLIT_KEYS opcodes to INSTANCE_VALUE.
1 parent 97308df commit a8b9350

18 files changed

+721
-400
lines changed

Include/cpython/object.h

+1
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ struct _typeobject {
270270

271271
destructor tp_finalize;
272272
vectorcallfunc tp_vectorcall;
273+
Py_ssize_t tp_inline_values_offset;
273274
};
274275

275276
/* The *real* layout of a type object when allocated on the heap */

Include/internal/pycore_dict.h

+2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ extern uint64_t _pydict_global_version;
101101

102102
#define DICT_NEXT_VERSION() (++_pydict_global_version)
103103

104+
PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
105+
104106
#ifdef __cplusplus
105107
}
106108
#endif

Include/internal/pycore_object.h

+10
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,16 @@ extern int _Py_CheckSlotResult(
181181
extern PyObject* _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems);
182182

183183
extern int _PyObject_InitializeDict(PyObject *obj);
184+
extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
185+
PyObject *name, PyObject *value);
186+
PyObject * _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values,
187+
PyObject *name);
188+
PyDictValues ** _PyObject_ValuesPointer(PyObject *);
189+
PyObject ** _PyObject_DictPointer(PyObject *);
190+
int _PyObject_VisitInstanceAttributes(PyObject *self, visitproc visit, void *arg);
191+
void _PyObject_ClearInstanceAttributes(PyObject *self);
192+
void _PyObject_FreeInstanceAttributes(PyObject *self);
193+
int _PyObject_IsInstanceDictEmpty(PyObject *);
184194

185195
#ifdef __cplusplus
186196
}

Include/object.h

+1
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ given type object has a specified feature.
333333
*/
334334

335335
#ifndef Py_LIMITED_API
336+
336337
/* Set if instances of the type object are treated as sequences for pattern matching */
337338
#define Py_TPFLAGS_SEQUENCE (1 << 5)
338339
/* Set if instances of the type object are treated as mappings for pattern matching */

Include/opcode.h

+11-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/opcode.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def jabs_op(name, op):
231231
"BINARY_SUBSCR_DICT",
232232
"JUMP_ABSOLUTE_QUICK",
233233
"LOAD_ATTR_ADAPTIVE",
234-
"LOAD_ATTR_SPLIT_KEYS",
234+
"LOAD_ATTR_INSTANCE_VALUE",
235235
"LOAD_ATTR_WITH_HINT",
236236
"LOAD_ATTR_SLOT",
237237
"LOAD_ATTR_MODULE",
@@ -242,8 +242,9 @@ def jabs_op(name, op):
242242
"LOAD_METHOD_CACHED",
243243
"LOAD_METHOD_CLASS",
244244
"LOAD_METHOD_MODULE",
245+
"LOAD_METHOD_NO_DICT",
245246
"STORE_ATTR_ADAPTIVE",
246-
"STORE_ATTR_SPLIT_KEYS",
247+
"STORE_ATTR_INSTANCE_VALUE",
247248
"STORE_ATTR_SLOT",
248249
"STORE_ATTR_WITH_HINT",
249250
# Super instructions

Lib/test/test_descr.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -5500,17 +5500,19 @@ class A:
55005500
class B(A):
55015501
pass
55025502

5503+
#Shrink keys by repeatedly creating instances
5504+
[(A(), B()) for _ in range(20)]
5505+
55035506
a, b = A(), B()
55045507
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
55055508
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({"a":1}))
5506-
# Initial hash table can contain at most 5 elements.
5509+
# Initial hash table can contain only one or two elements.
55075510
# Set 6 attributes to cause internal resizing.
55085511
a.x, a.y, a.z, a.w, a.v, a.u = range(6)
55095512
self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
55105513
a2 = A()
5511-
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
5512-
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({"a":1}))
5513-
b.u, b.v, b.w, b.t, b.s, b.r = range(6)
5514+
self.assertGreater(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
5515+
self.assertLess(sys.getsizeof(vars(a2)), sys.getsizeof({"a":1}))
55145516
self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({"a":1}))
55155517

55165518

Lib/test/test_dict.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -994,8 +994,8 @@ class C:
994994

995995
@support.cpython_only
996996
def test_splittable_setdefault(self):
997-
"""split table must be combined when setdefault()
998-
breaks insertion order"""
997+
"""split table must keep correct insertion
998+
order when attributes are adding using setdefault()"""
999999
a, b = self.make_shared_key_dict(2)
10001000

10011001
a['a'] = 1
@@ -1005,7 +1005,6 @@ def test_splittable_setdefault(self):
10051005
size_b = sys.getsizeof(b)
10061006
b['a'] = 1
10071007

1008-
self.assertGreater(size_b, size_a)
10091008
self.assertEqual(list(a), ['x', 'y', 'z', 'a', 'b'])
10101009
self.assertEqual(list(b), ['x', 'y', 'z', 'b', 'a'])
10111010

Lib/test/test_gc.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def __getattr__(self, someattribute):
444444
# 0, thus mutating the trash graph as a side effect of merely asking
445445
# whether __del__ exists. This used to (before 2.3b1) crash Python.
446446
# Now __getattr__ isn't called.
447-
self.assertEqual(gc.collect(), 4)
447+
self.assertEqual(gc.collect(), 2)
448448
self.assertEqual(len(gc.garbage), garbagelen)
449449

450450
def test_boom2(self):
@@ -471,7 +471,7 @@ def __getattr__(self, someattribute):
471471
# there isn't a second time, so this simply cleans up the trash cycle.
472472
# We expect a, b, a.__dict__ and b.__dict__ (4 objects) to get
473473
# reclaimed this way.
474-
self.assertEqual(gc.collect(), 4)
474+
self.assertEqual(gc.collect(), 2)
475475
self.assertEqual(len(gc.garbage), garbagelen)
476476

477477
def test_boom_new(self):
@@ -491,7 +491,7 @@ def __getattr__(self, someattribute):
491491
gc.collect()
492492
garbagelen = len(gc.garbage)
493493
del a, b
494-
self.assertEqual(gc.collect(), 4)
494+
self.assertEqual(gc.collect(), 2)
495495
self.assertEqual(len(gc.garbage), garbagelen)
496496

497497
def test_boom2_new(self):
@@ -513,7 +513,7 @@ def __getattr__(self, someattribute):
513513
gc.collect()
514514
garbagelen = len(gc.garbage)
515515
del a, b
516-
self.assertEqual(gc.collect(), 4)
516+
self.assertEqual(gc.collect(), 2)
517517
self.assertEqual(len(gc.garbage), garbagelen)
518518

519519
def test_get_referents(self):
@@ -943,8 +943,8 @@ def getstats():
943943
A()
944944
t = gc.collect()
945945
c, nc = getstats()
946-
self.assertEqual(t, 2*N) # instance object & its dict
947-
self.assertEqual(c - oldc, 2*N)
946+
self.assertEqual(t, N) # instance objects
947+
self.assertEqual(c - oldc, N)
948948
self.assertEqual(nc - oldnc, 0)
949949

950950
# But Z() is not actually collected.
@@ -964,8 +964,8 @@ def getstats():
964964
Z()
965965
t = gc.collect()
966966
c, nc = getstats()
967-
self.assertEqual(t, 2*N)
968-
self.assertEqual(c - oldc, 2*N)
967+
self.assertEqual(t, N)
968+
self.assertEqual(c - oldc, N)
969969
self.assertEqual(nc - oldnc, 0)
970970

971971
# The A() trash should have been reclaimed already but the
@@ -974,8 +974,8 @@ def getstats():
974974
zs.clear()
975975
t = gc.collect()
976976
c, nc = getstats()
977-
self.assertEqual(t, 4)
978-
self.assertEqual(c - oldc, 4)
977+
self.assertEqual(t, 2)
978+
self.assertEqual(c - oldc, 2)
979979
self.assertEqual(nc - oldnc, 0)
980980

981981
gc.enable()
@@ -1128,8 +1128,7 @@ def test_collect_generation(self):
11281128
@cpython_only
11291129
def test_collect_garbage(self):
11301130
self.preclean()
1131-
# Each of these cause four objects to be garbage: Two
1132-
# Uncollectables and their instance dicts.
1131+
# Each of these cause two objects to be garbage:
11331132
Uncollectable()
11341133
Uncollectable()
11351134
C1055820(666)
@@ -1138,8 +1137,8 @@ def test_collect_garbage(self):
11381137
if v[1] != "stop":
11391138
continue
11401139
info = v[2]
1141-
self.assertEqual(info["collected"], 2)
1142-
self.assertEqual(info["uncollectable"], 8)
1140+
self.assertEqual(info["collected"], 1)
1141+
self.assertEqual(info["uncollectable"], 4)
11431142

11441143
# We should now have the Uncollectables in gc.garbage
11451144
self.assertEqual(len(gc.garbage), 4)
@@ -1156,7 +1155,7 @@ def test_collect_garbage(self):
11561155
continue
11571156
info = v[2]
11581157
self.assertEqual(info["collected"], 0)
1159-
self.assertEqual(info["uncollectable"], 4)
1158+
self.assertEqual(info["uncollectable"], 2)
11601159

11611160
# Uncollectables should be gone
11621161
self.assertEqual(len(gc.garbage), 0)

Lib/test/test_sys.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1409,7 +1409,7 @@ def delx(self): del self.__x
14091409
check((1,2,3), vsize('') + 3*self.P)
14101410
# type
14111411
# static type: PyTypeObject
1412-
fmt = 'P2nPI13Pl4Pn9Pn11PIPP'
1412+
fmt = 'P2nPI13Pl4Pn9Pn12PIPP'
14131413
s = vsize(fmt)
14141414
check(int, s)
14151415
# class
@@ -1422,15 +1422,15 @@ def delx(self): del self.__x
14221422
'5P')
14231423
class newstyleclass(object): pass
14241424
# Separate block for PyDictKeysObject with 8 keys and 5 entries
1425-
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 8 + 5*calcsize("n2P"))
1425+
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
14261426
# dict with shared keys
1427-
check(newstyleclass().__dict__, size('nQ2P') + 5*self.P)
1427+
check(newstyleclass().__dict__, size('nQ2P') + 15*self.P)
14281428
o = newstyleclass()
14291429
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
14301430
# Separate block for PyDictKeysObject with 16 keys and 10 entries
1431-
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 16 + 10*calcsize("n2P"))
1431+
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 32 + 21*calcsize("n2P"))
14321432
# dict with shared keys
1433-
check(newstyleclass().__dict__, size('nQ2P') + 10*self.P)
1433+
check(newstyleclass().__dict__, size('nQ2P') + 13*self.P)
14341434
# unicode
14351435
# each tuple contains a string and its expected character size
14361436
# don't put any static strings here, as they may contain
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Object attributes are held in an array instead of a dictionary. An object's
2+
dictionary are created lazily, only when needed. Reduces the memory
3+
consumption of a typical Python object by about 30%. Patch by Mark Shannon.

0 commit comments

Comments
 (0)