Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

heap snapshot: add gc roots and gc finalist roots to fix unrooted nodes #52618

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 101 additions & 23 deletions src/gc-heap-snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "gc-heap-snapshot.h"

#include "julia_internal.h"
#include "julia_assert.h"
#include "gc.h"

#include "llvm/ADT/SmallVector.h"
Expand All @@ -12,8 +13,11 @@
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
#include <set>

using std::string;
using std::set;
using std::ostringstream;
using std::pair;
using std::make_pair;
Expand Down Expand Up @@ -116,6 +120,8 @@ struct HeapSnapshot {
DenseMap<void *, size_t> node_ptr_to_index_map;

size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes.
size_t _gc_root_idx = 1; // node index of the GC roots node
size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node
};

// global heap snapshot, mutated by garbage collector
Expand All @@ -128,13 +134,13 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
static inline void _record_gc_edge(const char *edge_type,
jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT;
void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
void _add_internal_root(HeapSnapshot *snapshot);
void _add_synthetic_root_entries(HeapSnapshot *snapshot);


JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
{
HeapSnapshot snapshot;
_add_internal_root(&snapshot);
_add_synthetic_root_entries(&snapshot);

jl_mutex_lock(&heapsnapshot_lock);

Expand All @@ -156,10 +162,12 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
serialize_heap_snapshot((ios_t*)stream, snapshot, all_one);
}

// adds a node at id 0 which is the "uber root":
// a synthetic node which points to all the GC roots.
void _add_internal_root(HeapSnapshot *snapshot)
// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212
// add synthetic nodes for the uber root, the GC roots, and the GC finalizer list roots
void _add_synthetic_root_entries(HeapSnapshot *snapshot)
{
// adds a node at id 0 which is the "uber root":
// a synthetic node which points to all the GC roots.
Node internal_root{
snapshot->node_types.find_or_create_string_id("synthetic"),
snapshot->names.find_or_create_string_id(""), // name
Expand All @@ -170,6 +178,44 @@ void _add_internal_root(HeapSnapshot *snapshot)
SmallVector<Edge, 0>() // outgoing edges
};
snapshot->nodes.push_back(internal_root);

// Add a node for the GC roots
snapshot->_gc_root_idx = snapshot->nodes.size();
Node gc_roots{
snapshot->node_types.find_or_create_string_id("synthetic"),
snapshot->names.find_or_create_string_id("GC roots"), // name
snapshot->_gc_root_idx, // id
0, // size
0, // size_t trace_node_id (unused)
0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached
SmallVector<Edge, 0>() // outgoing edges
};
snapshot->nodes.push_back(gc_roots);
snapshot->nodes.front().edges.push_back(Edge{
snapshot->edge_types.find_or_create_string_id("internal"),
snapshot->names.find_or_create_string_id("GC roots"), // edge label
snapshot->_gc_root_idx // to
});
snapshot->num_edges += 1;

// add a node for the gc finalizer list roots
snapshot->_gc_finlist_root_idx = snapshot->nodes.size();
Node gc_finlist_roots{
snapshot->node_types.find_or_create_string_id("synthetic"),
snapshot->names.find_or_create_string_id("GC finlist roots"), // name
IanButterworth marked this conversation as resolved.
Show resolved Hide resolved
snapshot->_gc_finlist_root_idx, // id
0, // size
0, // size_t trace_node_id (unused)
0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached
SmallVector<Edge, 0>() // outgoing edges
};
snapshot->nodes.push_back(gc_finlist_roots);
snapshot->nodes.front().edges.push_back(Edge{
snapshot->edge_types.find_or_create_string_id("internal"),
snapshot->names.find_or_create_string_id("GC finlist roots"), // edge label
snapshot->_gc_finlist_root_idx // to
});
snapshot->num_edges += 1;
}

// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
Expand Down Expand Up @@ -327,6 +373,26 @@ void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
_record_gc_just_edge("internal", internal_root, to_node_idx, edge_label);
}

void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
{
record_node_to_gc_snapshot(root);

auto from_node_idx = g_snapshot->_gc_root_idx;
auto to_node_idx = record_node_to_gc_snapshot(root);
auto edge_label = g_snapshot->names.find_or_create_string_id(name);
_record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label);
}

void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT
{
auto from_node_idx = g_snapshot->_gc_finlist_root_idx;
auto to_node_idx = record_node_to_gc_snapshot(obj);
ostringstream ss;
ss << "finlist-" << index;
auto edge_label = g_snapshot->names.find_or_create_string_id(ss.str());
_record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label);
}

// Add a node to the heap snapshot representing a Julia stack frame.
// Each task points at a stack frame, which points at the stack frame of
// the function it's currently calling, forming a linked list.
Expand Down Expand Up @@ -393,27 +459,19 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void
g_snapshot->names.find_or_create_string_id(path));
}

void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_binding_t *binding) JL_NOTSAFEPOINT
void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT
{
jl_globalref_t *globalref = binding->globalref;
jl_sym_t *name = globalref->name;
auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module);
auto to_node_idx = record_pointer_to_gc_snapshot(binding, sizeof(jl_binding_t), jl_symbol_name(name));

jl_value_t *value = jl_atomic_load_relaxed(&binding->value);
auto value_idx = value ? record_node_to_gc_snapshot(value) : 0;
jl_value_t *ty = jl_atomic_load_relaxed(&binding->ty);
auto ty_idx = ty ? record_node_to_gc_snapshot(ty) : 0;
auto globalref_idx = record_node_to_gc_snapshot((jl_value_t*)globalref);

auto to_bindings_idx = record_node_to_gc_snapshot(bindings);
auto to_bindingkeyset_idx = record_node_to_gc_snapshot(bindingkeyset);
auto &from_node = g_snapshot->nodes[from_node_idx];
auto &to_node = g_snapshot->nodes[to_node_idx];

_record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("<native>"));
if (value_idx) _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value"));
if (ty_idx) _record_gc_just_edge("internal", to_node, ty_idx, g_snapshot->names.find_or_create_string_id("ty"));
if (globalref_idx) _record_gc_just_edge("internal", to_node, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref"));
}
if (to_bindings_idx > 0) {
_record_gc_just_edge("internal", from_node, to_bindings_idx, g_snapshot->names.find_or_create_string_id("bindings"));
}
if (to_bindingkeyset_idx > 0) {
_record_gc_just_edge("internal", from_node, to_bindingkeyset_idx, g_snapshot->names.find_or_create_string_id("bindingkeyset"));
}
}

void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -492,6 +550,8 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one

ios_printf(stream, "\"nodes\":[");
bool first_node = true;
// use a set to track the nodes that do not have parents
set<size_t> orphans;
for (const auto &from_node : snapshot.nodes) {
if (first_node) {
first_node = false;
Expand All @@ -508,6 +568,14 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
from_node.edges.size(),
from_node.trace_node_id,
from_node.detachedness);
if (from_node.id != snapshot._gc_root_idx && from_node.id != snapshot._gc_finlist_root_idx) {
// find the node index from the node object pointer
void * ptr = (void*)from_node.id;
size_t n_id = snapshot.node_ptr_to_index_map[ptr];
orphans.insert(n_id);
} else {
orphans.insert(from_node.id);
}
}
ios_printf(stream, "],\n");

Expand All @@ -525,6 +593,12 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
edge.type,
edge.name_or_index,
edge.to_node * k_node_number_of_fields);
auto n_id = edge.to_node;
auto it = orphans.find(n_id);
if (it != orphans.end()) {
// remove the node from the orphans if it has at least one incoming edge
orphans.erase(it);
}
}
}
ios_printf(stream, "],\n"); // end "edges"
Expand All @@ -534,4 +608,8 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
snapshot.names.print_json_array(stream, true);

ios_printf(stream, "}");

// remove the uber node from the orphans
orphans.erase(0);
assert(orphans.size() == 0 && "all nodes except the uber node should have at least one incoming edge");
}
33 changes: 28 additions & 5 deletions src/gc-heap-snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@ void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_N
void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT;
void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT;
void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT;
void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT;
void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT;
// Used for objects managed by GC, but which aren't exposed in the julia object, so have no
// field or index. i.e. they're not reachable from julia code, but we _will_ hit them in
// the GC mark phase (so we can check their type tag to get the size).
void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
// Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the
// size of the object, even though we're never going to mark that object.
void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT;

// Used for objects that are reachable from the GC roots
void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
// Used for objects that are reachable from the finalizer list
void _gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT;

extern int gc_heap_snapshot_enabled;
extern int prev_sweep_full;
Expand Down Expand Up @@ -60,6 +63,12 @@ static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL
_gc_heap_snapshot_record_root(root, name);
}
}
static inline void gc_heap_snapshot_record_array_edge_index(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && from != NULL && to != NULL)) {
_gc_heap_snapshot_record_array_edge(from, to, index);
}
}
static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
Expand All @@ -73,10 +82,10 @@ static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_valu
}
}

static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT
static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
_gc_heap_snapshot_record_module_to_binding(module, binding);
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full) && bindings != NULL && bindingkeyset != NULL) {
_gc_heap_snapshot_record_module_to_binding(module, bindings, bindingkeyset);
}
}

Expand All @@ -94,6 +103,20 @@ static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* t
}
}

static inline void gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && root != NULL)) {
_gc_heap_snapshot_record_gc_roots(root, name);
}
}

static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && finlist != NULL)) {
_gc_heap_snapshot_record_finlist(finlist, index);
}
}

// ---------------------------------------------------------------------
// Functions to call from Julia to take heap snapshot
// ---------------------------------------------------------------------
Expand Down
33 changes: 27 additions & 6 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2333,9 +2333,10 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
break;
}
case GC_finlist_chunk: {
jl_value_t *fl_parent = c->parent;
jl_value_t **fl_begin = c->begin;
jl_value_t **fl_end = c->end;
gc_mark_finlist_(mq, fl_begin, fl_end);
gc_mark_finlist_(mq, fl_parent, fl_begin, fl_end);
break;
}
default: {
Expand Down Expand Up @@ -2433,6 +2434,7 @@ STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent
jl_value_t *bindingkeyset = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindingkeyset);
gc_assert_parent_validity((jl_value_t *)mb_parent, bindingkeyset);
gc_try_claim_and_push(mq, bindingkeyset, &nptr);
gc_heap_snapshot_record_module_to_binding(mb_parent, bindings, bindingkeyset);
gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->parent);
gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->parent, &nptr);
size_t nusings = mb_parent->usings.len;
Expand All @@ -2451,7 +2453,7 @@ STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent
}
}

void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end)
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end)
{
jl_value_t *new_obj;
// Decide whether need to chunk finlist
Expand All @@ -2461,8 +2463,10 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
gc_chunkqueue_push(mq, &c);
fl_end = fl_begin + GC_CHUNK_BATCH_SIZE;
}
size_t i = 0;
for (; fl_begin < fl_end; fl_begin++) {
new_obj = *fl_begin;
jl_value_t **slot = fl_begin;
new_obj = *slot;
if (__unlikely(new_obj == NULL))
continue;
if (gc_ptr_tag(new_obj, 1)) {
Expand All @@ -2473,6 +2477,13 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
if (gc_ptr_tag(new_obj, 2))
continue;
gc_try_claim_and_push(mq, new_obj, NULL);
if (fl_parent != NULL) {
gc_heap_snapshot_record_array_edge(fl_parent, slot);
} else {
// This is a list of objects following the same format as a finlist
// if `fl_parent` is NULL
gc_heap_snapshot_record_finlist(new_obj, ++i);
}
}
}

Expand All @@ -2484,7 +2495,7 @@ void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start)
return;
jl_value_t **fl_begin = (jl_value_t **)list->items + start;
jl_value_t **fl_end = (jl_value_t **)list->items + len;
gc_mark_finlist_(mq, fl_begin, fl_end);
gc_mark_finlist_(mq, NULL, fl_begin, fl_end);
}

JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
Expand Down Expand Up @@ -3133,28 +3144,38 @@ static void gc_mark_roots(jl_gc_markqueue_t *mq)
{
// modules
gc_try_claim_and_push(mq, jl_main_module, NULL);
gc_heap_snapshot_record_root((jl_value_t*)jl_main_module, "main_module");
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_main_module, "main_module");
// invisible builtin values
gc_try_claim_and_push(mq, jl_an_empty_vec_any, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_an_empty_vec_any, "an_empty_vec_any");
gc_try_claim_and_push(mq, jl_module_init_order, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_module_init_order, "module_init_order");
for (size_t i = 0; i < jl_current_modules.size; i += 2) {
if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
gc_try_claim_and_push(mq, jl_current_modules.table[i], NULL);
gc_heap_snapshot_record_root((jl_value_t*)jl_current_modules.table[i], "top level module");
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_current_modules.table[i], "top level module");
}
}
gc_try_claim_and_push(mq, jl_anytuple_type_type, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_anytuple_type_type, "anytuple_type_type");
for (size_t i = 0; i < N_CALL_CACHE; i++) {
jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
gc_try_claim_and_push(mq, v, NULL);
gc_heap_snapshot_record_array_edge_index((jl_value_t*)jl_anytuple_type_type, (jl_value_t*)v, i);
}
gc_try_claim_and_push(mq, jl_all_methods, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_all_methods, "all_methods");
gc_try_claim_and_push(mq, _jl_debug_method_invalidation, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)_jl_debug_method_invalidation, "debug_method_invalidation");
// constants
gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_emptytuple_type, "emptytuple_type");
gc_try_claim_and_push(mq, cmpswap_names, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)cmpswap_names, "cmpswap_names");
gc_try_claim_and_push(mq, jl_global_roots_list, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_list, "global_roots_list");
gc_try_claim_and_push(mq, jl_global_roots_keyset, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_keyset, "global_roots_keyset");
}

// find unmarked objects that need to be finalized from the finalizer list "list".
Expand Down
2 changes: 1 addition & 1 deletion src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ extern uv_sem_t gc_sweep_assists_needed;
extern _Atomic(int) gc_n_threads_marking;
extern _Atomic(int) gc_n_threads_sweeping;
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
void gc_mark_loop_serial(jl_ptls_t ptls);
Expand Down