Skip to content

Commit

Permalink
Revert "GH-126491: GC: Mark objects reachable from roots before doing…
Browse files Browse the repository at this point in the history
… cycle collection (GH-126502)" (#126983)
  • Loading branch information
hugovk authored Nov 19, 2024
1 parent 84f07c3 commit 899fdb2
Show file tree
Hide file tree
Showing 21 changed files with 330 additions and 332 deletions.
2 changes: 0 additions & 2 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ typedef struct _gc_stats {
uint64_t collections;
uint64_t object_visits;
uint64_t objects_collected;
uint64_t objects_transitively_reachable;
uint64_t objects_not_transitively_reachable;
} GCStats;

typedef struct _uop_stats {
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ extern int _PyDict_Next(

extern int _PyDict_HasOnlyStringKeys(PyObject *mp);

extern void _PyDict_MaybeUntrack(PyObject *mp);

// Export for '_ctypes' shared extension
PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);

Expand Down
3 changes: 0 additions & 3 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ typedef struct _PyInterpreterFrame {
_PyStackRef *stackpointer;
uint16_t return_offset; /* Only relevant during a function call */
char owner;
char visited;
/* Locals and stack */
_PyStackRef localsplus[1];
} _PyInterpreterFrame;
Expand Down Expand Up @@ -208,7 +207,6 @@ _PyFrame_Initialize(
#endif
frame->return_offset = 0;
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;

for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
frame->localsplus[i] = PyStackRef_NULL;
Expand Down Expand Up @@ -391,7 +389,6 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
frame->instr_ptr = _PyCode_CODE(code);
#endif
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;
frame->return_offset = 0;

#ifdef Py_GIL_DISABLED
Expand Down
11 changes: 2 additions & 9 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ extern "C" {

/* GC information is stored BEFORE the object structure. */
typedef struct {
// Tagged pointer to next object in the list.
// Pointer to next object in the list.
// 0 means the object is not tracked
uintptr_t _gc_next;

// Tagged pointer to previous object in the list.
// Pointer to previous object in the list.
// Lowest two bits are used for flags documented later.
uintptr_t _gc_prev;
} PyGC_Head;
Expand Down Expand Up @@ -302,11 +302,6 @@ struct gc_generation_stats {
Py_ssize_t uncollectable;
};

enum _GCPhase {
GC_PHASE_MARK = 0,
GC_PHASE_COLLECT = 1
};

struct _gc_runtime_state {
/* List of objects that still need to be cleaned up, singly linked
* via their gc headers' gc_prev pointers. */
Expand All @@ -330,12 +325,10 @@ struct _gc_runtime_state {
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;

Py_ssize_t prior_heap_size;
Py_ssize_t heap_size;
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
int visited_space;
int phase;

#ifdef Py_GIL_DISABLED
/* This is the number of objects that survived the last full
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,8 @@ static inline void _PyObject_GC_TRACK(
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
/* Young objects will be moved into the visited space during GC, so set the bit here */
gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
generation0->_gc_prev = (uintptr_t)gc;
#endif
}
Expand Down
1 change: 0 additions & 1 deletion Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ extern PyTypeObject _PyExc_MemoryError;
{ .threshold = 0, }, \
}, \
.work_to_do = -5000, \
.phase = GC_PHASE_MARK, \
}, \
.qsbr = { \
.wr_seq = QSBR_INITIAL, \
Expand Down
63 changes: 15 additions & 48 deletions InternalDocs/garbage_collector.md
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,6 @@ follows these steps in order:
the reference counts fall to 0, triggering the destruction of all unreachable
objects.


Optimization: incremental collection
====================================

Expand Down Expand Up @@ -485,46 +484,6 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
```


Optimization: visiting reachable objects
========================================

An object cannot be garbage if it can be reached.

To avoid having to identify reference cycles across the whole heap, we can
reduce the amount of work done considerably by first moving most reachable objects
to the `visited` space. Empirically, most reachable objects can be reached from a
small set of global objects and local variables.
This step does much less work per object, so reduces the time spent
performing garbage collection by at least half.

> [!NOTE]
> Objects that are not determined to be reachable by this pass are not necessarily
> unreachable. We still need to perform the main algorithm to determine which objects
> are actually unreachable.
We use the same technique of forming a transitive closure as the incremental
collector does to find reachable objects, seeding the list with some global
objects and the currently executing frames.

This phase moves objects to the `visited` space, as follows:

1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
module and all objects directly referred to from stack frames are added to a working
set of reachable objects.
2. Until this working set is empty:
1. Pop an object from the set and move it to the `visited` space
2. For each object directly reachable from that object:
* If it is not already in `visited` space and it is a GC object,
add it to the working set


Before each increment of collection is performed, the stacks are scanned
to check for any new stack frames that have been created since the last
increment. All objects directly referred to from those stack frames are
added to the working set.
Then the above algorithm is repeated, starting from step 2.


Optimization: reusing fields to save memory
===========================================

Expand Down Expand Up @@ -573,8 +532,8 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section:
currently in. Instead, when that's needed, ad hoc tricks (like the
`NEXT_MASK_UNREACHABLE` flag) are employed.

Optimization: delayed untracking of containers
==============================================
Optimization: delay tracking containers
=======================================

Certain types of containers cannot participate in a reference cycle, and so do
not need to be tracked by the garbage collector. Untracking these objects
Expand All @@ -589,8 +548,8 @@ a container:
As a general rule, instances of atomic types aren't tracked and instances of
non-atomic types (containers, user-defined objects...) are. However, some
type-specific optimizations can be present in order to suppress the garbage
collector footprint of simple instances. Historically, both dictionaries and
tuples were untracked during garbage collection. Now it is only tuples:
collector footprint of simple instances. Some examples of native types that
benefit from delayed tracking:

- Tuples containing only immutable objects (integers, strings etc,
and recursively, tuples of immutable objects) do not need to be tracked. The
Expand All @@ -599,8 +558,14 @@ tuples were untracked during garbage collection. Now it is only tuples:
tuples at creation time. Instead, all tuples except the empty tuple are tracked
when created. During garbage collection it is determined whether any surviving
tuples can be untracked. A tuple can be untracked if all of its contents are
already not tracked. Tuples are examined for untracking when moved from the
young to the old generation.
already not tracked. Tuples are examined for untracking in all garbage collection
cycles. It may take more than one cycle to untrack a tuple.

- Dictionaries containing only immutable objects also do not need to be tracked.
Dictionaries are untracked when created. If a tracked item is inserted into a
dictionary (either as a key or value), the dictionary becomes tracked. During a
full garbage collection (all generations), the collector will untrack any dictionaries
whose contents are not tracked.

The garbage collector module provides the Python function `is_tracked(obj)`, which returns
the current tracking status of the object. Subsequent garbage collections may change the
Expand All @@ -613,9 +578,11 @@ tracking status of the object.
False
>>> gc.is_tracked([])
True
>>> gc.is_tracked(("a", 1))
>>> gc.is_tracked({})
False
>>> gc.is_tracked({"a": 1})
False
>>> gc.is_tracked({"a": []})
True
```

Expand Down
109 changes: 109 additions & 0 deletions Lib/test/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,115 @@ class C(object):
gc.collect()
self.assertIs(ref(), None, "Cycle was not collected")

def _not_tracked(self, t):
# Nested containers can take several collections to untrack
gc.collect()
gc.collect()
self.assertFalse(gc.is_tracked(t), t)

def _tracked(self, t):
self.assertTrue(gc.is_tracked(t), t)
gc.collect()
gc.collect()
self.assertTrue(gc.is_tracked(t), t)

def test_string_keys_can_track_values(self):
# Test that this doesn't leak.
for i in range(10):
d = {}
for j in range(10):
d[str(j)] = j
d["foo"] = d

@support.cpython_only
def test_track_literals(self):
# Test GC-optimization of dict literals
x, y, z, w = 1.5, "a", (1, None), []

self._not_tracked({})
self._not_tracked({x:(), y:x, z:1})
self._not_tracked({1: "a", "b": 2})
self._not_tracked({1: 2, (None, True, False, ()): int})
self._not_tracked({1: object()})

# Dicts with mutable elements are always tracked, even if those
# elements are not tracked right now.
self._tracked({1: []})
self._tracked({1: ([],)})
self._tracked({1: {}})
self._tracked({1: set()})

@support.cpython_only
def test_track_dynamic(self):
# Test GC-optimization of dynamically-created dicts
class MyObject(object):
pass
x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()

d = dict()
self._not_tracked(d)
d[1] = "a"
self._not_tracked(d)
d[y] = 2
self._not_tracked(d)
d[z] = 3
self._not_tracked(d)
self._not_tracked(d.copy())
d[4] = w
self._tracked(d)
self._tracked(d.copy())
d[4] = None
self._not_tracked(d)
self._not_tracked(d.copy())

# dd isn't tracked right now, but it may mutate and therefore d
# which contains it must be tracked.
d = dict()
dd = dict()
d[1] = dd
self._not_tracked(dd)
self._tracked(d)
dd[1] = d
self._tracked(dd)

d = dict.fromkeys([x, y, z])
self._not_tracked(d)
dd = dict()
dd.update(d)
self._not_tracked(dd)
d = dict.fromkeys([x, y, z, o])
self._tracked(d)
dd = dict()
dd.update(d)
self._tracked(dd)

d = dict(x=x, y=y, z=z)
self._not_tracked(d)
d = dict(x=x, y=y, z=z, w=w)
self._tracked(d)
d = dict()
d.update(x=x, y=y, z=z)
self._not_tracked(d)
d.update(w=w)
self._tracked(d)

d = dict([(x, y), (z, 1)])
self._not_tracked(d)
d = dict([(x, y), (z, w)])
self._tracked(d)
d = dict()
d.update([(x, y), (z, 1)])
self._not_tracked(d)
d.update([(x, y), (z, w)])
self._tracked(d)

@support.cpython_only
def test_track_subtypes(self):
# Dict subtypes are always tracked
class MyDict(dict):
pass
self._tracked(MyDict())

def make_shared_key_dict(self, n):
class C:
pass
Expand Down
Loading

0 comments on commit 899fdb2

Please sign in to comment.