Skip to content

Commit

Permalink
pythonGH-126491: GC: Mark objects reachable from roots before doing c…
Browse files Browse the repository at this point in the history
…ycle collection (pythonGH-127110)

* Mark almost all reachable objects before doing collection phase

* Add stats for objects marked

* Visit new frames before each increment

* Update docs

* Clearer calculation of work to do.
  • Loading branch information
markshannon authored Dec 2, 2024
1 parent 2a373da commit a8dd821
Show file tree
Hide file tree
Showing 14 changed files with 355 additions and 103 deletions.
2 changes: 2 additions & 0 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ typedef struct _gc_stats {
uint64_t collections;
uint64_t object_visits;
uint64_t objects_collected;
uint64_t objects_transitively_reachable;
uint64_t objects_not_transitively_reachable;
} GCStats;

typedef struct _uop_stats {
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame {
_PyStackRef *stackpointer;
uint16_t return_offset; /* Only relevant during a function call */
char owner;
char visited;
/* Locals and stack */
_PyStackRef localsplus[1];
} _PyInterpreterFrame;
Expand Down Expand Up @@ -207,6 +208,7 @@ _PyFrame_Initialize(
#endif
frame->return_offset = 0;
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;

for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
frame->localsplus[i] = PyStackRef_NULL;
Expand Down Expand Up @@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
frame->instr_ptr = _PyCode_CODE(code);
#endif
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;
frame->return_offset = 0;

#ifdef Py_GIL_DISABLED
Expand Down
10 changes: 8 additions & 2 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ extern "C" {

/* GC information is stored BEFORE the object structure. */
typedef struct {
// Pointer to next object in the list.
// Tagged pointer to next object in the list.
// 0 means the object is not tracked
uintptr_t _gc_next;

// Pointer to previous object in the list.
// Tagged pointer to previous object in the list.
// Lowest two bits are used for flags documented later.
uintptr_t _gc_prev;
} PyGC_Head;
Expand Down Expand Up @@ -284,6 +284,11 @@ struct gc_generation_stats {
Py_ssize_t uncollectable;
};

enum _GCPhase {
GC_PHASE_MARK = 0,
GC_PHASE_COLLECT = 1
};

struct _gc_runtime_state {
/* List of objects that still need to be cleaned up, singly linked
* via their gc headers' gc_prev pointers. */
Expand Down Expand Up @@ -311,6 +316,7 @@ struct _gc_runtime_state {
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
int visited_space;
int phase;

#ifdef Py_GIL_DISABLED
/* This is the number of objects that survived the last full
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,8 @@ static inline void _PyObject_GC_TRACK(
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
/* Young objects will be moved into the visited space during GC, so set the bit here */
gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
generation0->_gc_prev = (uintptr_t)gc;
#endif
}
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ extern PyTypeObject _PyExc_MemoryError;
{ .threshold = 0, }, \
}, \
.work_to_do = -5000, \
.phase = GC_PHASE_MARK, \
}, \
.qsbr = { \
.wr_seq = QSBR_INITIAL, \
Expand Down
39 changes: 39 additions & 0 deletions InternalDocs/garbage_collector.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,45 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
```


Optimization: visiting reachable objects
========================================

An object cannot be garbage if it can be reached.

To avoid having to identify reference cycles across the whole heap, we can
reduce the amount of work done considerably by first moving most reachable objects
to the `visited` space. Empirically, most reachable objects can be reached from a
small set of global objects and local variables.
This step does much less work per object, so reduces the time spent
performing garbage collection by at least half.

> [!NOTE]
> Objects that are not determined to be reachable by this pass are not necessarily
> unreachable. We still need to perform the main algorithm to determine which objects
> are actually unreachable.
We use the same technique of forming a transitive closure as the incremental
collector does to find reachable objects, seeding the list with some global
objects and the currently executing frames.

This phase moves objects to the `visited` space, as follows:

1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
module and all objects directly referred to from stack frames are added to a working
set of reachable objects.
2. Until this working set is empty:
1. Pop an object from the set and move it to the `visited` space
2. For each object directly reachable from that object:
* If it is not already in `visited` space and it is a GC object,
add it to the working set


Before each increment of collection is performed, the stacks are scanned
to check for any new stack frames that have been created since the last
increment. All objects directly referred to from those stack frames are
added to the working set.
Then the above algorithm is repeated, starting from step 2.


Optimization: reusing fields to save memory
===========================================

Expand Down
2 changes: 1 addition & 1 deletion Lib/test/libregrtest/refleak.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ def get_pooled_int(value):
xml_filename = 'refleak-xml.tmp'
result = None
dash_R_cleanup(fs, ps, pic, zdc, abcs)
support.gc_collect()

for i in rep_range:
support.gc_collect()
current = refleak_helper._hunting_for_refleaks
refleak_helper._hunting_for_refleaks = True
try:
Expand Down
24 changes: 14 additions & 10 deletions Lib/test/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs):
return C
ContainerNoGC = None

try:
import _testinternalcapi
except ImportError:
_testinternalcapi = None

### Support code
###############################################################################

Expand Down Expand Up @@ -1130,6 +1135,7 @@ def setUp(self):
def tearDown(self):
gc.disable()

@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
@requires_gil_enabled("Free threading does not support incremental GC")
# Use small increments to emulate longer running process in a shorter time
@gc_threshold(200, 10)
Expand Down Expand Up @@ -1167,20 +1173,15 @@ def make_ll(depth):
enabled = gc.isenabled()
gc.enable()
olds = []
initial_heap_size = _testinternalcapi.get_tracked_heap_size()
for i in range(20_000):
newhead = make_ll(20)
count += 20
newhead.surprise = head
olds.append(newhead)
if len(olds) == 20:
stats = gc.get_stats()
young = stats[0]
incremental = stats[1]
old = stats[2]
collected = young['collected'] + incremental['collected'] + old['collected']
count += CORRECTION
live = count - collected
self.assertLess(live, 25000)
new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
del olds[:]
if not enabled:
gc.disable()
Expand Down Expand Up @@ -1322,7 +1323,8 @@ def test_refcount_errors(self):
from test.support import gc_collect, SuppressCrashReport
a = [1, 2, 3]
b = [a]
b = [a, a]
a.append(b)
# Avoid coredump when Py_FatalError() calls abort()
SuppressCrashReport().__enter__()
Expand All @@ -1332,6 +1334,8 @@ def test_refcount_errors(self):
# (to avoid deallocating it):
import ctypes
ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
del a
del b
# The garbage collector should now have a fatal error
# when it reaches the broken object
Expand Down Expand Up @@ -1360,7 +1364,7 @@ def test_refcount_errors(self):
self.assertRegex(stderr,
br'object type name: list')
self.assertRegex(stderr,
br'object repr : \[1, 2, 3\]')
br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]')


class GCTogglingTests(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Add a marking phase to the GC. All objects that can be transitively reached
from builtin modules or the stacks are marked as reachable before cycle
detection. This reduces the amount of work done by the GC by approximately
half.
6 changes: 6 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -2076,6 +2076,11 @@ has_deferred_refcount(PyObject *self, PyObject *op)
return PyBool_FromLong(_PyObject_HasDeferredRefcount(op));
}

static PyObject *
get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
}

static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
Expand Down Expand Up @@ -2174,6 +2179,7 @@ static PyMethodDef module_functions[] = {
{"get_static_builtin_types", get_static_builtin_types, METH_NOARGS},
{"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS},
{"has_deferred_refcount", has_deferred_refcount, METH_O},
{"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS},
{NULL, NULL} /* sentinel */
};

Expand Down
1 change: 1 addition & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
entry_frame.stackpointer = entry_frame.localsplus;
entry_frame.owner = FRAME_OWNED_BY_CSTACK;
entry_frame.visited = 0;
entry_frame.return_offset = 0;
/* Push frame */
entry_frame.previous = tstate->current_frame;
Expand Down
Loading

0 comments on commit a8dd821

Please sign in to comment.