Skip to content

Commit

Permalink
Merge branch 'pg_kill'
Browse files Browse the repository at this point in the history
  • Loading branch information
gnovis committed Nov 22, 2016
2 parents 8da580a + 8c8ce8f commit e983d91
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 17 deletions.
35 changes: 20 additions & 15 deletions scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def get_args():

# main function
def run_task(gpu_info_file, args):

is_waiting = False

while True:
Expand Down Expand Up @@ -111,15 +110,16 @@ def run_task(gpu_info_file, args):
p = subprocess.Popen(task,
stdout=args.out,
stderr=args.err,
preexec_fn=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
preexec_fn=before_new_subprocess)

# The second Ctrl-C kill the subprocess
signal.signal(signal.SIGINT, lambda signum, frame: stop_subprocess(p, gpu_info_file, free_gpu))

set_additional_info(gpu_info_file, free_gpu, os.getlogin(), task,
p.pid, get_formated_dt(dt_before), cuda)

print("SCH PID: {}\nTASK PID: {}\nGPU: {}".format(os.getpid(), p.pid, cuda))
print("GPU: {}\nSCH PID: {}\nTASK PID: {}".format(cuda, os.getpid(), p.pid))
print("SCH PGID: {}\nTASK PGID: {}".format(os.getpgid(os.getpid()), os.getpgid(p.pid)))
p.wait()

dt_after = datetime.datetime.now()
Expand All @@ -146,6 +146,11 @@ def run_task(gpu_info_file, args):
handle_io_error(e)


def before_new_subprocess():
signal.signal(signal.SIGINT, signal.SIG_IGN)
os.setsid()


def prepare_args(args):
result = []
for a in args.split('\n'):
Expand All @@ -156,11 +161,11 @@ def prepare_args(args):

def stop_subprocess(process, gpu_file, gpu_to_release):
"""
This function take care of the Ctrl-C signal.
This function take care of the Ctrl-C (SIGINT) signal.
On the first Ctrl-C the warning is printed.
On the second Ctrl-C the task is terminated.
On the third Ctrl-C the task is killed.
Delay between terminate and kill is 5 seconds.
Delay between terminate and kill is specified in KILL_DELAY_SEC.
"""
def allow_kill_task():
global TASK_SIGNAL
Expand All @@ -175,24 +180,24 @@ def check_process_liveness(process, max_time):
global TASK_SIGNAL

if TASK_SIGNAL is KILL:
print("\nThe task (PID: {}) was killed.".format(process.pid))
pgid = os.getpgid(process.pid)
print("\nThe task (PGID: {}) was killed.".format(pgid))
set_free_gpu(gpu_file, gpu_to_release)
pgrp = os.getpgid(process.pid)
os.killpg(pgrp, signal.SIGKILL)
os.killpg(pgid, signal.SIGKILL)
TASK_SIGNAL = None

# currently this branch is not used, in future, create new process group
# for the subprocess
elif TASK_SIGNAL is TERMINATE:
print("\nThe task (PID: {}) was terminated.".format(process.pid))
pgid = os.getpgid(process.pid)
print("\nThe task (PGID: {}) was terminated.".format(pgid))
set_free_gpu(gpu_file, gpu_to_release)
pgrp = os.getpgid(process.pid)
os.killpg(pgrp, signal.SIGTERM)
os.killpg(pgid, signal.SIGTERM)
check_process_liveness(process, KILL_DELAY_SEC)
TASK_SIGNAL = None

elif TASK_SIGNAL is WARN:
print("\nNext Ctrl-C kill the task (PID: {}).".format(process.pid))
TASK_SIGNAL = KILL
pgid = os.getpgid(process.pid)
print("\nNext Ctrl-C terminate the task (PGID: {}).".format(pgid))
TASK_SIGNAL = TERMINATE


def check_forced_free(gpu_indices, forced):
Expand Down
4 changes: 2 additions & 2 deletions tests/run_tests
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/sh

#../scheduler.py "./pgtest"
../scheduler.py "./test1.py `cat ./setup`" -v -fg 2
../scheduler.py "./pgtest" -v
#../scheduler.py "./test1.py -foo bar" -v -fg 2
#../scheduler.py "./test2.py -f 1 -opt 1 --port __num__"

0 comments on commit e983d91

Please sign in to comment.