Skip to content

Commit

Permalink
Merge pull request #1385 from grondo/issue#1379
Browse files Browse the repository at this point in the history
flux-wreck cancel: fall back to kill -9 if job is not pending
  • Loading branch information
garlick authored Mar 24, 2018
2 parents 4223879 + 69c228b commit 7fd643e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
17 changes: 13 additions & 4 deletions src/cmd/flux-wreck
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,24 @@ end
prog:SubCommand {
name = "cancel",
description = "Cancel a pending job",
usage = "JOBID",
usage = "[-f|--force] JOBID",
options = {
{ name = "force", char = "f",
usage = "Force cancel even if scheduler is not loaded"
},
},
handler = function (self, arg)
local id = check_jobid_arg (self, arg[1])
local resp, err = f:rpc ("sched.cancel", { jobid = tonumber (id) })
if not resp then
if err == "Function not implemented" then
if err == "Function not implemented" and not self.opt.f then
prog:die ("job cancel not supported when scheduler not loaded")
else
prog:die ("Unable to cancel %d: %s\n", id, err)
elseif self.opt.f or err == "Invalid argument" then
prog:log ("Sending SIGKILL to %d\n", id)
local rc, err = f:sendevent ({signal = 9}, "wreck.%d.kill", id)
if not rc then self:die ("signal: %s\n", err) end
else
prog:die ("Unable to cancel %d: %s\n", id, err)
end
end
end
Expand Down
14 changes: 14 additions & 0 deletions t/t2000-wreck.t
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,20 @@ test_expect_success NO_SCHED 'flux-wreck cancel: fails when sched not loaded' '
EOF
test_cmp expected.cancel err.cancel
'
test_expect_success 'flux-wreck cancel: falls back to SIGKILL with -f' '
run_timeout 1 flux wreckrun --detach sleep 100 &&
id=$(last_job_id) &&
LWJ=$(last_job_path) &&
${SHARNESS_TEST_SRCDIR}/scripts/kvs-watch-until.lua -vt 1 $LWJ.state "v == \"running\"" &&
flux wreck cancel -f $id &&
${SHARNESS_TEST_SRCDIR}/scripts/kvs-watch-until.lua -vt 1 $LWJ.state "v == \"complete\"" &&
test_expect_code 137 flux wreck status $id >output.cancel-f &&
cat >expected.cancel-f <<-EOF &&
Job $id status: complete
task0: exited with signal 9
EOF
test_cmp expected.cancel-f output.cancel-f
'

check_complete_link() {
for i in `seq 0 5`; do
Expand Down

0 comments on commit 7fd643e

Please sign in to comment.