Skip to content

Commit

Permalink
Add self timeout and crash if exceeded. (sonic-net#1502)
Browse files Browse the repository at this point in the history
Log callstack on timeout.

What I did
Add self timeout and crash on timeout.
Before crash log the error and call stack.

How I did it
Add a signal based alarm and the handler to print error & call stack.

How to verify it
Artificially introduce a sleep (> TIMEOUT, which is 2mins) in the script in any function that is in the call path.
Invoke the script. Watch it crash and note the error & stack logged in syslog.
  • Loading branch information
renukamanavalan authored Mar 17, 2021
1 parent aa71231 commit 19d4042
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
19 changes: 18 additions & 1 deletion scripts/route_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
import sys
import syslog
import time
import signal
import traceback

from swsscommon import swsscommon

Expand All @@ -53,6 +55,9 @@

SUBSCRIBE_WAIT_SECS = 1

# Max of 2 minutes
TIMEOUT_SECONDS = 120

UNIT_TESTING = 0

os.environ['PYTHONUNBUFFERED']='True'
Expand All @@ -75,6 +80,14 @@ def __str__(self):
report_level = syslog.LOG_ERR
write_to_syslog = False

def handler(signum, frame):
print_message(syslog.LOG_ERR,
"Aborting routeCheck.py upon timeout signal after {} seconds".
format(TIMEOUT_SECONDS))
print_message(syslog.LOG_ERR, str(traceback.extract_stack()))
raise Exception("timeout occurred")


def set_level(lvl, log_to_syslog):
"""
Sets the log level
Expand Down Expand Up @@ -429,7 +442,7 @@ def main():
parser=argparse.ArgumentParser(description="Verify routes between APPL-DB & ASIC-DB are in sync")
parser.add_argument('-m', "--mode", type=Level, choices=list(Level), default='ERR')
parser.add_argument("-i", "--interval", type=int, default=0, help="Scan interval in seconds")
parser.add_argument("-s", "--log_to_syslog", action="store_true", default=False, help="Write message to syslog")
parser.add_argument("-s", "--log_to_syslog", action="store_true", default=True, help="Write message to syslog")
args = parser.parse_args()

set_level(args.mode, args.log_to_syslog)
Expand All @@ -444,8 +457,12 @@ def main():
if UNIT_TESTING:
interval = 1

signal.signal(signal.SIGALRM, handler)

while True:
signal.alarm(TIMEOUT_SECONDS)
ret, res= check_routes()
signal.alarm(0)

if interval:
time.sleep(interval)
Expand Down
20 changes: 20 additions & 0 deletions tests/route_check_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
import sys
import time
from unittest.mock import MagicMock, patch

import pytest
Expand Down Expand Up @@ -276,6 +277,7 @@ def table_side_effect(db, tbl):

class mock_selector:
TIMEOUT = 1
EMULATE_HANG = False

def __init__(self):
self.select_state = 0
Expand All @@ -295,6 +297,9 @@ def select(self, timeout):
state = self.select_state
self.subs.update()

if mock_selector.EMULATE_HANG:
time.sleep(60)

if self.select_state == 0:
self.select_state = self.TIMEOUT
else:
Expand Down Expand Up @@ -423,6 +428,21 @@ def test_server(self, mock_subs, mock_sel, mock_table, mock_conn):
assert res == expect_res


# Test timeout
route_check.TIMEOUT_SECONDS = 5
mock_selector.EMULATE_HANG = True
ex_raised = False

try:
ret, res = route_check.main()
except Exception as err:
ex_raised = True
expect = "timeout occurred"
ex_str = str(err)
assert ex_str == expect, "{} != {}".format(ex_str, expect)
assert ex_raised, "Exception expected"





Expand Down

0 comments on commit 19d4042

Please sign in to comment.