Skip to content

Commit

Permalink
[1/4 for #2365, #2671] Fix create/kill race with schedulers and tasks…
Browse files Browse the repository at this point in the history
… during rust_kernel::fail
  • Loading branch information
bblum committed Jul 20, 2012
1 parent f55999f commit 5bb4a12
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 32 deletions.
12 changes: 8 additions & 4 deletions src/rt/rust_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ rust_kernel::rust_kernel(rust_env *env) :
max_port_id(1),
rval(0),
max_sched_id(1),
killed(false),
sched_reaper(this),
osmain_driver(NULL),
non_weak_tasks(0),
Expand Down Expand Up @@ -103,7 +104,8 @@ rust_kernel::create_scheduler(rust_sched_launcher_factory *launchfac,
id = max_sched_id++;
assert(id != INTPTR_MAX && "Hit the maximum scheduler id");
sched = new (this, "rust_scheduler")
rust_scheduler(this, num_threads, id, allow_exit, launchfac);
rust_scheduler(this, num_threads, id, allow_exit, killed,
launchfac);
bool is_new = sched_table
.insert(std::pair<rust_sched_id,
rust_scheduler*>(id, sched)).second;
Expand Down Expand Up @@ -197,6 +199,10 @@ rust_kernel::fail() {
#endif
// Copy the list of schedulers so that we don't hold the lock while
// running kill_all_tasks.
// I think this only needs to be done by one task ever; as it is,
// multiple tasks invoking kill_all might get here. Currently libcore
// ensures only one task will ever invoke it, but this would really be
// fine either way, so I'm leaving it as it is. -- bblum
// FIXME (#2671): There's a lot that happens under kill_all_tasks,
// and I don't know that holding sched_lock here is ok, but we need
// to hold the sched lock to prevent the scheduler from being
Expand All @@ -205,15 +211,13 @@ rust_kernel::fail() {
std::vector<rust_scheduler*> scheds;
{
scoped_lock with(sched_lock);
killed = true;
for (sched_map::iterator iter = sched_table.begin();
iter != sched_table.end(); iter++) {
scheds.push_back(iter->second);
}
}

// FIXME (#2671): This is not a foolproof way to kill all tasks
// while ensuring that no new tasks or schedulers are created in the
// meantime that keep the scheduler alive.
for (std::vector<rust_scheduler*>::iterator iter = scheds.begin();
iter != scheds.end(); iter++) {
(*iter)->kill_all_tasks();
Expand Down
6 changes: 5 additions & 1 deletion src/rt/rust_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class rust_kernel {
lock_and_signal rval_lock;
int rval;

// Protects max_sched_id and sched_table, join_list
// Protects max_sched_id and sched_table, join_list, killed
lock_and_signal sched_lock;
// The next scheduler id
rust_sched_id max_sched_id;
Expand All @@ -81,6 +81,10 @@ class rust_kernel {
sched_map sched_table;
// A list of scheduler ids that are ready to exit
std::vector<rust_sched_id> join_list;
// Whether or not the runtime has to die (triggered when the root/main
// task group fails). This propagates to all new schedulers and tasks
// created after it is set.
bool killed;

rust_sched_reaper sched_reaper;
// The single-threaded scheduler that uses the main thread
Expand Down
23 changes: 13 additions & 10 deletions src/rt/rust_sched_launcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,36 @@

const size_t SCHED_STACK_SIZE = 1024*100;

rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id)
rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id,
bool killed)
: kernel(sched->kernel),
sched_loop(sched, id),
sched_loop(sched, id, killed),
driver(&sched_loop) {
}

rust_thread_sched_launcher::rust_thread_sched_launcher(rust_scheduler *sched,
int id)
: rust_sched_launcher(sched, id),
int id, bool killed)
: rust_sched_launcher(sched, id, killed),
rust_thread(SCHED_STACK_SIZE) {
}

rust_manual_sched_launcher::rust_manual_sched_launcher(rust_scheduler *sched,
int id)
: rust_sched_launcher(sched, id) {
int id, bool killed)
: rust_sched_launcher(sched, id, killed) {
}

rust_sched_launcher *
rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id) {
rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id,
bool killed) {
return new(sched->kernel, "rust_thread_sched_launcher")
rust_thread_sched_launcher(sched, id);
rust_thread_sched_launcher(sched, id, killed);
}

rust_sched_launcher *
rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id) {
rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id,
bool killed) {
assert(launcher == NULL && "I can only track one sched_launcher");
launcher = new(sched->kernel, "rust_manual_sched_launcher")
rust_manual_sched_launcher(sched, id);
rust_manual_sched_launcher(sched, id, killed);
return launcher;
}
14 changes: 8 additions & 6 deletions src/rt/rust_sched_launcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class rust_sched_launcher : public kernel_owned<rust_sched_launcher> {
rust_sched_driver driver;

public:
rust_sched_launcher(rust_scheduler *sched, int id);
rust_sched_launcher(rust_scheduler *sched, int id, bool killed);
virtual ~rust_sched_launcher() { }

virtual void start() = 0;
Expand All @@ -29,15 +29,15 @@ class rust_thread_sched_launcher
:public rust_sched_launcher,
private rust_thread {
public:
rust_thread_sched_launcher(rust_scheduler *sched, int id);
rust_thread_sched_launcher(rust_scheduler *sched, int id, bool killed);
virtual void start() { rust_thread::start(); }
virtual void join() { rust_thread::join(); }
virtual void run() { driver.start_main_loop(); }
};

class rust_manual_sched_launcher : public rust_sched_launcher {
public:
rust_manual_sched_launcher(rust_scheduler *sched, int id);
rust_manual_sched_launcher(rust_scheduler *sched, int id, bool killed);
virtual void start() { }
virtual void join() { }
rust_sched_driver *get_driver() { return &driver; };
Expand All @@ -47,13 +47,14 @@ class rust_sched_launcher_factory {
public:
virtual ~rust_sched_launcher_factory() { }
virtual rust_sched_launcher *
create(rust_scheduler *sched, int id) = 0;
create(rust_scheduler *sched, int id, bool killed) = 0;
};

class rust_thread_sched_launcher_factory
: public rust_sched_launcher_factory {
public:
virtual rust_sched_launcher *create(rust_scheduler *sched, int id);
virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
bool killed);
};

class rust_manual_sched_launcher_factory
Expand All @@ -62,7 +63,8 @@ class rust_manual_sched_launcher_factory
rust_manual_sched_launcher *launcher;
public:
rust_manual_sched_launcher_factory() : launcher(NULL) { }
virtual rust_sched_launcher *create(rust_scheduler *sched, int id);
virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
bool killed);
rust_sched_driver *get_driver() {
assert(launcher != NULL);
return launcher->get_driver();
Expand Down
10 changes: 9 additions & 1 deletion src/rt/rust_sched_loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ const size_t C_STACK_SIZE = 1024*1024;

bool rust_sched_loop::tls_initialized = false;

rust_sched_loop::rust_sched_loop(rust_scheduler *sched,int id) :
rust_sched_loop::rust_sched_loop(rust_scheduler *sched, int id, bool killed) :
_log(this),
id(id),
should_exit(false),
cached_c_stack(NULL),
dead_task(NULL),
killed(killed),
pump_signal(NULL),
kernel(sched->kernel),
sched(sched),
Expand Down Expand Up @@ -63,6 +64,8 @@ rust_sched_loop::kill_all_tasks() {

{
scoped_lock with(lock);
// Any task created after this will be killed. See transition, below.
killed = true;

for (size_t i = 0; i < running_tasks.length(); i++) {
all_tasks.push_back(running_tasks[i]);
Expand Down Expand Up @@ -319,6 +322,11 @@ rust_sched_loop::transition(rust_task *task,
}
task->set_state(dst, cond, cond_name);

// If the entire runtime is failing, newborn tasks must be doomed.
if (src == task_state_newborn && killed) {
task->kill_inner();
}

pump_loop();
}

Expand Down
4 changes: 3 additions & 1 deletion src/rt/rust_sched_loop.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct rust_sched_loop
rust_task_list running_tasks;
rust_task_list blocked_tasks;
rust_task *dead_task;
bool killed;

rust_signal *pump_signal;

Expand Down Expand Up @@ -91,7 +92,7 @@ struct rust_sched_loop

// Only a pointer to 'name' is kept, so it must live as long as this
// domain.
rust_sched_loop(rust_scheduler *sched, int id);
rust_sched_loop(rust_scheduler *sched, int id, bool killed);
void activate(rust_task *task);
rust_log & get_log();
void fail();
Expand All @@ -107,6 +108,7 @@ struct rust_sched_loop
void log_state();

void kill_all_tasks();
bool doomed();

rust_task *create_task(rust_task *spawner, const char *name);

Expand Down
12 changes: 7 additions & 5 deletions src/rt/rust_scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
size_t num_threads,
rust_sched_id id,
bool allow_exit,
bool killed,
rust_sched_launcher_factory *launchfac) :
kernel(kernel),
live_threads(num_threads),
Expand All @@ -18,7 +19,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
num_threads(num_threads),
id(id)
{
create_task_threads(launchfac);
create_task_threads(launchfac, killed);
}

rust_scheduler::~rust_scheduler() {
Expand All @@ -27,8 +28,8 @@ rust_scheduler::~rust_scheduler() {

rust_sched_launcher *
rust_scheduler::create_task_thread(rust_sched_launcher_factory *launchfac,
int id) {
rust_sched_launcher *thread = launchfac->create(this, id);
int id, bool killed) {
rust_sched_launcher *thread = launchfac->create(this, id, killed);
KLOG(kernel, kern, "created task thread: " PTR ", id: %d",
thread, id);
return thread;
Expand All @@ -41,11 +42,12 @@ rust_scheduler::destroy_task_thread(rust_sched_launcher *thread) {
}

void
rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac) {
rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac,
bool killed) {
KLOG(kernel, kern, "Using %d scheduler threads.", num_threads);

for(size_t i = 0; i < num_threads; ++i) {
threads.push(create_task_thread(launchfac, i));
threads.push(create_task_thread(launchfac, i, killed));
}
}

Expand Down
8 changes: 5 additions & 3 deletions src/rt/rust_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,20 @@ class rust_scheduler : public kernel_owned<rust_scheduler> {

rust_sched_id id;

void create_task_threads(rust_sched_launcher_factory *launchfac);
void create_task_threads(rust_sched_launcher_factory *launchfac,
bool killed);
void destroy_task_threads();

rust_sched_launcher *
create_task_thread(rust_sched_launcher_factory *launchfac, int id);
create_task_thread(rust_sched_launcher_factory *launchfac, int id,
bool killed);
void destroy_task_thread(rust_sched_launcher *thread);

void exit();

public:
rust_scheduler(rust_kernel *kernel, size_t num_threads,
rust_sched_id id, bool allow_exit,
rust_sched_id id, bool allow_exit, bool killed,
rust_sched_launcher_factory *launchfac);
~rust_scheduler();

Expand Down
11 changes: 10 additions & 1 deletion src/rt/rust_task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,17 @@ rust_task::yield(bool *killed) {
void
rust_task::kill() {
scoped_lock with(lifecycle_lock);
kill_inner();
}

void rust_task::kill_inner() {
lifecycle_lock.must_have_lock();

// XXX: bblum: kill/kill race
// Multiple kills should be able to safely race, but check anyway.
if (killed) {
LOG(this, task, "task %s @0x%" PRIxPTR " already killed", name, this);
return;
}

// Note the distinction here: kill() is when you're in an upcall
// from task A and want to force-fail task B, you do B->kill().
Expand Down
1 change: 1 addition & 0 deletions src/rt/rust_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ rust_task : public kernel_owned<rust_task>

// Fail this task (assuming caller-on-stack is different task).
void kill();
void kill_inner();

// Indicates that we've been killed and now is an apropriate
// time to fail as a result
Expand Down

0 comments on commit 5bb4a12

Please sign in to comment.