-
Notifications
You must be signed in to change notification settings - Fork 301
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #232 from eugeneia/watchdog
Added watchdog module.
- Loading branch information
Showing
6 changed files
with
214 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
The `lib.watchdog.watchdog` module implements a per-thread watchdog | ||
functionality. Its purpose is to watch and kill processes which fail to | ||
call the watchdog periodically (e.g. hang). | ||
|
||
It does so by using `alarm(3)` and `ualarm(3)` to have the OS send a | ||
`SIGALRM` to the process after a specified timeout. Because the process | ||
does not handle the signal it will be killed and exit with status `142`. | ||
|
||
Usage is as follows: | ||
|
||
-- Use the watchdog module. | ||
watchdog = require("lib.watchdog.wachdog") | ||
|
||
`set(n)` sets the watchdog timeout to `n` milliseconds. Because | ||
`alarm(3)` is used for timeouts longer than one second values for `n` | ||
greater than 1000 (e.g. a second) will be rounded up to the next second | ||
(e.g. `set(1100)` <=> `set(2000)`). | ||
|
||
-- Set the timeout to 500ms. | ||
watchdog.set(500) | ||
|
||
`reset()` will reset the alarm (or start it if it has not been started | ||
before). Thus now you have 500 milliseconds to reset before the process | ||
will be killed. | ||
|
||
-- Start or reset the timeout. | ||
watchdog.reset() | ||
|
||
Alternatively you can use `stop()` to disable the timeout and prevent the | ||
process to be killed. | ||
|
||
-- Disable the timeout. | ||
watchdog.stop() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/sh | ||
|
||
./snabb ./lib/watchdog/selftest_design alert | ||
if [ $? != 0 ]; then exit 1; fi | ||
|
||
./snabb ./lib/watchdog/selftest_design alert_stop | ||
if [ $? != 0 ]; then exit 1; fi | ||
|
||
./snabb ./lib/watchdog/selftest_design alert_timeout | ||
if [ $? != 142 ]; then exit 1; fi | ||
|
||
./snabb ./lib/watchdog/selftest_design ualert | ||
if [ $? != 0 ]; then exit 1; fi | ||
|
||
./snabb ./lib/watchdog/selftest_design ualert_stop | ||
if [ $? != 0 ]; then exit 1; fi | ||
|
||
./snabb ./lib/watchdog/selftest_design ualert_timeout | ||
if [ $? != 142 ]; then exit 1; fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/env snabb | ||
|
||
local ffi = require("ffi") | ||
local C = ffi.C | ||
local watchdog = require("lib.watchdog.watchdog") | ||
|
||
test = {} | ||
|
||
test["ualert"] = function () | ||
watchdog.set(400) | ||
watchdog.reset() | ||
print("Set timeout, now sleeping...") | ||
|
||
C.usleep(300000) | ||
print("Resetting watchdog.") | ||
watchdog.reset() | ||
|
||
C.usleep(300000) | ||
print("Exit normally.") | ||
end | ||
|
||
test["ualert_stop"] = function () | ||
watchdog.set(400) | ||
watchdog.reset() | ||
print("Set timeout, now sleeping...") | ||
|
||
C.usleep(300000) | ||
print("Stopping watchdog.") | ||
watchdog.stop() | ||
|
||
C.usleep(300000) | ||
print("Exit normally.") | ||
end | ||
|
||
test["ualert_timeout"] = function () | ||
watchdog.set(400) | ||
watchdog.reset() | ||
print("Set timeout, now sleeping until watchdog times out.") | ||
|
||
C.usleep(800000) | ||
print("Error: SIGABRT not received.") | ||
end | ||
|
||
test["alert"] = function () | ||
watchdog.set(2000) | ||
watchdog.reset() | ||
print("Set timeout, now sleeping...") | ||
|
||
C.sleep(1) | ||
print("Resetting watchdog.") | ||
watchdog.reset() | ||
|
||
C.sleep(1) | ||
print("Exit normally.") | ||
end | ||
|
||
test["alert_stop"] = function () | ||
watchdog.set(2000) | ||
watchdog.reset() | ||
print("Set timeout, now sleeping...") | ||
|
||
C.sleep(1) | ||
print("Stopping watchdog.") | ||
watchdog.stop() | ||
|
||
C.sleep(1) | ||
print("Exit normally.") | ||
end | ||
|
||
test["alert_timeout"] = function () | ||
watchdog.set(2000) | ||
watchdog.reset() | ||
print("Set timeout, now sleeping until watchdog times out.") | ||
|
||
C.sleep(800000) | ||
print("Error: SIGABRT not received.") | ||
end | ||
|
||
function run (testcase) | ||
print("[testing "..testcase.."]") | ||
test[testcase]() | ||
end | ||
|
||
run(unpack(main.parameters)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
module(...,package.seeall) | ||
|
||
ffi = require("ffi") | ||
C = ffi.C | ||
|
||
-- Watchdog timeout in unit defined by `precision' (just below). | ||
timeout = nil | ||
|
||
-- Watchdog precision. | ||
precision = nil | ||
|
||
-- Set watchdog timeout to mseconds (milliseconds). Does NOT start the | ||
-- watchdog. Values for mseconds>1000 are truncated to the next second, | ||
-- e.g. set(1100) <=> set(2000). | ||
function set (mseconds) | ||
if mseconds > 1000 then | ||
timeout = math.ceil(mseconds / 1000) | ||
precision = "second" | ||
else | ||
timeout = mseconds * 1000 | ||
precision = "microsecond" | ||
end | ||
end | ||
|
||
-- (Re)set timeout. E.g. starts the watchdog if it has not been started | ||
-- before and resets the timeout otherwise. | ||
function reset () | ||
if precision == "second" then | ||
C.alarm(timeout) | ||
elseif precision == "microsecond" then | ||
C.ualarm(timeout, 0) | ||
else | ||
error("Watchdog was not set.") | ||
end | ||
end | ||
|
||
-- Disable timeout. | ||
function stop () | ||
if precision == "second" then | ||
C.alarm(0) | ||
elseif precision == "microsecond" then | ||
C.ualarm(0,0) | ||
end | ||
end |