Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring block_for_gc #100

Merged
merged 6 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 78 additions & 56 deletions julia/mmtk_julia.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ extern void mmtk_store_obj_size_c(void* obj, size_t size);
extern void jl_gc_free_array(jl_array_t *a);
extern size_t mmtk_get_obj_size(void* obj);
extern void jl_rng_split(uint64_t to[JL_RNG_SIZE], uint64_t from[JL_RNG_SIZE]);
extern jl_mutex_t finalizers_lock;
extern void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
extern void mmtk_block_thread_for_gc(void);

extern void* new_mutator_iterator(void);
extern jl_ptls_t get_next_mutator_tls(void*);
Expand Down Expand Up @@ -138,65 +141,89 @@ void mmtk_exit_from_safepoint(int8_t old_state) {
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
}

// all threads pass here and if there is another thread doing GC,
// it will block until GC is done
// that thread simply exits from block_for_gc without executing finalizers
// when executing finalizers do not let another thread do GC (set a variable such that while that variable is true, no GC can be done)
int8_t set_gc_initial_state(void* ptls_raw)
// based on jl_gc_collect from gc.c
JL_DLLEXPORT void jl_gc_prepare_to_collect(void)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this function just duplicates jl_gc_collect. Have you considered moving the function to gc-common.c, and letting Julia and MMTk each implement a different _jl_gc_collect?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that triggering GC manually in Julia is done via a call to jl_gc_collect (https://github.com/mmtk/julia/blob/83796a7d38878faa9afdb94ce84b6da00035b336/base/gcutils.jl#L129). For MMTk, we want that function to call into mmtk_handle_user_collection_request as it currently does. That's why I did the duplication, with jl_gc_prepare_to_collect doing the actual preparation that is done in jl_gc_collect from gc.c.
We could potentially reuse jl_gc_collect for what I'm doing by passing a different argument (eg. -1), but we'd still have the duplication - the code for Julia in gc.c and the code for MMTk in mmtk-gc.c.

{
jl_ptls_t ptls = (jl_ptls_t) ptls_raw;
int8_t old_state = jl_atomic_load_relaxed(&((jl_ptls_t)ptls)->gc_state);
jl_atomic_store_release(&((jl_ptls_t)ptls)->gc_state, JL_GC_STATE_WAITING);
if (!jl_safepoint_start_gc()) {
jl_gc_state_set((jl_ptls_t)ptls, old_state, JL_GC_STATE_WAITING);
return -1;
// FIXME: set to JL_GC_AUTO since we're calling it from mmtk
// maybe just remove this?
JL_PROBE_GC_BEGIN(JL_GC_AUTO);

jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
return;
}
return old_state;
}

void set_gc_final_state(int8_t old_state)
{
jl_ptls_t ptls = jl_current_task->ptls;
jl_safepoint_end_gc();
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
}

void set_gc_old_state(int8_t old_state)
{
jl_ptls_t ptls = jl_current_task->ptls;
jl_atomic_store_release(&ptls->gc_state, old_state);
}
int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
// `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
uint64_t t0 = jl_hrtime();
if (!jl_safepoint_start_gc()) {
// either another thread is running GC, or the GC got disabled just now.
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
return;
}

void wait_for_the_world(void)
{
JL_TIMING_SUSPEND_TASK(GC, ct);
JL_TIMING(GC, GC);

int last_errno = errno;
#ifdef _OS_WINDOWS_
DWORD last_error = GetLastError();
#endif
// Now we are ready to wait for other threads to hit the safepoint,
// we can do a few things that doesn't require synchronization.
//
// We must sync here with the tls_lock operations, so that we have a
// seq-cst order between these events now we know that either the new
// thread must run into our safepoint flag or we must observe the
// existence of the thread in the jl_n_threads count.
//
// TODO: concurrently queue objects
jl_fence();
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
assert(gc_n_threads);
if (gc_n_threads > 1)
jl_wake_libuv();
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 == NULL)
continue;
// This acquire load pairs with the release stores
// in the signal handler of safepoint so we are sure that
// all the stores on those threads are visible.
// We're currently also using atomic store release in mutator threads
// (in jl_gc_state_set), but we may want to use signals to flush the
// memory operations on those threads lazily instead.
while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state))
jl_cpu_pause(); // yield?
jl_gc_wait_for_the_world(gc_all_tls_states, gc_n_threads);
JL_PROBE_GC_STOP_THE_WORLD();

uint64_t t1 = jl_hrtime();
uint64_t duration = t1 - t0;
if (duration > gc_num.max_time_to_safepoint)
gc_num.max_time_to_safepoint = duration;
gc_num.time_to_safepoint = duration;
gc_num.total_time_to_safepoint += duration;

if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) {
JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock
#ifndef __clang_gcanalyzer__
mmtk_block_thread_for_gc();
#endif
JL_UNLOCK_NOGC(&finalizers_lock);
}
}

void set_jl_last_err(int e)
{
errno = e;
}
gc_n_threads = 0;
gc_all_tls_states = NULL;
jl_safepoint_end_gc();
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
JL_PROBE_GC_END();

int get_jl_last_err(void)
{
return errno;
// Only disable finalizers on current thread
// Doing this on all threads is racy (it's impossible to check
// or wait for finalizers on other threads without dead lock).
if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
JL_TIMING(GC, GC_Finalizers);
run_finalizers(ct);
}
JL_PROBE_GC_FINALIZER();

#ifdef _OS_WINDOWS_
SetLastError(last_error);
#endif
errno = last_errno;
}

extern void run_finalizers(jl_task_t *ct);
Expand Down Expand Up @@ -401,12 +428,6 @@ Julia_Upcalls mmtk_upcalls = (Julia_Upcalls) {
.scan_julia_exc_obj = scan_julia_exc_obj,
.get_stackbase = get_stackbase,
// .run_finalizer_function = run_finalizer_function,
.get_jl_last_err = get_jl_last_err,
.set_jl_last_err = set_jl_last_err,
.wait_for_the_world = wait_for_the_world,
.set_gc_initial_state = set_gc_initial_state,
.set_gc_final_state = set_gc_final_state,
.set_gc_old_state = set_gc_old_state,
.mmtk_jl_run_finalizers = mmtk_jl_run_finalizers,
.jl_throw_out_of_memory_error = jl_throw_out_of_memory_error,
.sweep_malloced_array = mmtk_sweep_malloced_arrays,
Expand All @@ -421,4 +442,5 @@ Julia_Upcalls mmtk_upcalls = (Julia_Upcalls) {
.arraylist_grow = (void (*)(void*, long unsigned int))arraylist_grow,
.get_jl_gc_have_pending_finalizers = get_jl_gc_have_pending_finalizers,
.scan_vm_specific_roots = scan_vm_specific_roots,
.prepare_to_collect = jl_gc_prepare_to_collect,
};
4 changes: 0 additions & 4 deletions julia/mmtk_julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@

extern Julia_Upcalls mmtk_upcalls;

int get_jl_last_err(void);
void set_jl_last_err(int e);
void set_gc_final_state(int8_t old_state);
int set_gc_running_state(jl_ptls_t ptls);
void set_gc_old_state(int8_t old_state);
void mmtk_jl_gc_run_all_finalizers(void);
void mmtk_jl_run_pending_finalizers(void* tls);
4 changes: 2 additions & 2 deletions mmtk/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions mmtk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ edition = "2018"
# Metadata for the Julia repository
[package.metadata.julia]
# Our CI matches the following line and extract mmtk/julia. If this line is updated, please check ci yaml files and make sure it works.
julia_repo = "https://github.com/mmtk/julia.git"
julia_version = "83796a7d38878faa9afdb94ce84b6da00035b336"
julia_repo = "https://github.com/udesou/julia.git"
julia_version = "66a49ccf864bded60b232140f57c69059a503f07"

[lib]
crate-type = ["cdylib"]
Expand Down
7 changes: 1 addition & 6 deletions mmtk/api/mmtk.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,6 @@ extern uintptr_t JULIA_MALLOC_BYTES;
typedef struct {
void (* scan_julia_exc_obj) (void* obj, void* closure, ProcessEdgeFn process_edge);
void* (* get_stackbase) (int16_t tid);
int (* get_jl_last_err) (void);
void (* set_jl_last_err) (int e);
void (* wait_for_the_world) (void);
int8_t (* set_gc_initial_state) (void* tls);
void (* set_gc_final_state) (int8_t old_state);
void (* set_gc_old_state) (int8_t old_state);
void (* mmtk_jl_run_finalizers) (void* tls);
void (* jl_throw_out_of_memory_error) (void);
void (* sweep_malloced_array) (void);
Expand All @@ -90,6 +84,7 @@ typedef struct {
void (*arraylist_grow)(void* a, size_t n);
int* (*get_jl_gc_have_pending_finalizers)(void);
void (*scan_vm_specific_roots)(RootsWorkClosure* closure);
void (*prepare_to_collect)(void);
} Julia_Upcalls;

/**
Expand Down
7 changes: 4 additions & 3 deletions mmtk/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use mmtk::util::{Address, ObjectReference, OpaquePointer};
use mmtk::AllocationSemantics;
use mmtk::Mutator;
use std::ffi::CStr;
use std::sync::atomic::AtomicIsize;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};

#[no_mangle]
Expand Down Expand Up @@ -255,7 +256,7 @@ pub extern "C" fn mmtk_disable_collection() {
}

// if user has triggered GC, wait until GC is finished
while AtomicBool::load(&USER_TRIGGERED_GC, Ordering::SeqCst)
while AtomicIsize::load(&USER_TRIGGERED_GC, Ordering::SeqCst) != 0
|| AtomicBool::load(&BLOCK_FOR_GC, Ordering::SeqCst)
{
info!("Waiting for a triggered gc to finish...");
Expand Down Expand Up @@ -295,9 +296,9 @@ pub extern "C" fn mmtk_modify_check(object: ObjectReference) {

#[no_mangle]
pub extern "C" fn mmtk_handle_user_collection_request(tls: VMMutatorThread, collection: u8) {
AtomicBool::store(&USER_TRIGGERED_GC, true, Ordering::SeqCst);
AtomicIsize::fetch_add(&USER_TRIGGERED_GC, 1, Ordering::SeqCst);
if AtomicBool::load(&DISABLED_GC, Ordering::SeqCst) {
AtomicBool::store(&USER_TRIGGERED_GC, false, Ordering::SeqCst);
AtomicIsize::fetch_add(&USER_TRIGGERED_GC, -1, Ordering::SeqCst);
return;
}
// See jl_gc_collection_t
Expand Down
65 changes: 21 additions & 44 deletions mmtk/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use mmtk::util::alloc::AllocationError;
use mmtk::util::opaque_pointer::*;
use mmtk::vm::{Collection, GCThreadContext};
use mmtk::Mutator;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::atomic::{AtomicBool, AtomicIsize, AtomicU64, Ordering};

use crate::{BLOCK_FOR_GC, STW_COND, WORLD_HAS_STOPPED};

Expand Down Expand Up @@ -63,52 +63,11 @@ impl Collection<JuliaVM> for VMCollection {
trace!("Resuming mutators.");
}

fn block_for_gc(tls: VMMutatorThread) {
fn block_for_gc(_tls: VMMutatorThread) {
info!("Triggered GC!");

AtomicBool::store(&BLOCK_FOR_GC, true, Ordering::SeqCst);
unsafe { ((*UPCALLS).prepare_to_collect)() };

let tls_ptr = match tls {
VMMutatorThread(t) => match t {
VMThread(ptr) => ptr,
},
};

let old_state = unsafe { ((*UPCALLS).set_gc_initial_state)(tls_ptr) };

if old_state == -1 {
info!("Multiple threads entered GC simultaneously.");
return;
}

unsafe { ((*UPCALLS).wait_for_the_world)() };

let last_err = unsafe { ((*UPCALLS).get_jl_last_err)() };

{
let &(ref lock, ref cvar) = &*STW_COND.clone();
let mut count = lock.lock().unwrap();

info!("Blocking for GC!");

AtomicBool::store(&WORLD_HAS_STOPPED, true, Ordering::SeqCst);

while AtomicBool::load(&BLOCK_FOR_GC, Ordering::SeqCst) {
count = cvar.wait(count).unwrap();
}
}

info!("GC Done!");
if AtomicBool::load(&USER_TRIGGERED_GC, Ordering::SeqCst) {
AtomicBool::store(&USER_TRIGGERED_GC, false, Ordering::SeqCst);
}

unsafe { ((*UPCALLS).set_gc_final_state)(old_state) };

info!("Finalizing objects!");
unsafe { ((*UPCALLS).mmtk_jl_run_finalizers)(tls_ptr) };

unsafe { ((*UPCALLS).set_jl_last_err)(last_err) };
info!("Finished blocking mutator for GC!");
}

Expand Down Expand Up @@ -149,3 +108,21 @@ pub fn is_current_gc_nursery() -> bool {
None => false,
}
}

#[no_mangle]
pub extern "C" fn mmtk_block_thread_for_gc() {
AtomicBool::store(&BLOCK_FOR_GC, true, Ordering::SeqCst);

let &(ref lock, ref cvar) = &*STW_COND.clone();
let mut count = lock.lock().unwrap();

info!("Blocking for GC!");

AtomicBool::store(&WORLD_HAS_STOPPED, true, Ordering::SeqCst);

while AtomicBool::load(&BLOCK_FOR_GC, Ordering::SeqCst) {
count = cvar.wait(count).unwrap();
}

AtomicIsize::store(&USER_TRIGGERED_GC, 0, Ordering::SeqCst);
}
10 changes: 3 additions & 7 deletions mmtk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use mmtk::MMTK;

use std::collections::HashMap;
use std::ptr::null_mut;
use std::sync::atomic::AtomicIsize;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Condvar, Mutex, RwLock};

Expand Down Expand Up @@ -76,7 +77,7 @@ pub static WORLD_HAS_STOPPED: AtomicBool = AtomicBool::new(false);
pub static DISABLED_GC: AtomicBool = AtomicBool::new(false);

#[no_mangle]
pub static USER_TRIGGERED_GC: AtomicBool = AtomicBool::new(false);
pub static USER_TRIGGERED_GC: AtomicIsize = AtomicIsize::new(0);

lazy_static! {
pub static ref STW_COND: Arc<(Mutex<usize>, Condvar)> =
Expand All @@ -98,12 +99,6 @@ pub struct Julia_Upcalls {
pub scan_julia_exc_obj:
extern "C" fn(obj: Address, closure: Address, process_edge: ProcessEdgeFn),
pub get_stackbase: extern "C" fn(tid: u16) -> usize,
pub get_jl_last_err: extern "C" fn() -> u32,
pub set_jl_last_err: extern "C" fn(errno: u32),
pub wait_for_the_world: extern "C" fn(),
pub set_gc_initial_state: extern "C" fn(tls: OpaquePointer) -> i8,
pub set_gc_final_state: extern "C" fn(old_state: i8),
pub set_gc_old_state: extern "C" fn(old_state: i8),
pub mmtk_jl_run_finalizers: extern "C" fn(tls: OpaquePointer),
pub jl_throw_out_of_memory_error: extern "C" fn(),
pub mmtk_sweep_malloced_array: extern "C" fn(),
Expand All @@ -118,6 +113,7 @@ pub struct Julia_Upcalls {
pub arraylist_grow: extern "C" fn(Address, usize),
pub get_jl_gc_have_pending_finalizers: extern "C" fn() -> *mut i32,
pub scan_vm_specific_roots: extern "C" fn(closure: *mut crate::edges::RootsWorkClosure),
pub prepare_to_collect: extern "C" fn(),
}

pub static mut UPCALLS: *const Julia_Upcalls = null_mut();
Loading