Skip to content

Commit

Permalink
[reboot] User-friendly reboot cause message for kernel panic (#1486)
Browse files Browse the repository at this point in the history
Signed-off-by: Yong Zhao yozhao@microsoft.com

What I did
If the rebooting of SONiC device was caused by kernel panic, then the CLI command show reboot-cause should show Kernel Panic.

How I did it
Currently if kernel was panicked, then the device would be rebooted. The reboot script wrote a message into reboot-cause.txt. I just updated the content of this message.

How to verify it
I verified this change on the virtual switch in the following steps:

Trigger kernel panic: echo c > /proc/sysrq-trigger
After device was rebooted, run the CLI show reboot-cause:
admin@vlab-01:~$ show reboot-cause
Kernel Panic [Time: Tue 09 Mar 2021 03:03:56 AM UTC]
Previous command output (if the output of a command-line utility has changed)
admin@vlab-01:~$ show reboot-cause
User issued 'kdump' command [User: kdump, Time: Mon 08 Mar 2021 01:47:43 AM UTC]

New command output (if the output of a command-line utility has changed)
admin@vlab-01:~$ show reboot-cause
Kernel Panic [Time: Tue 09 Mar 2021 03:03:56 AM UTC]
  • Loading branch information
yozhao101 authored Mar 28, 2021
1 parent 1f1696a commit 4d89510
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 16 deletions.
2 changes: 1 addition & 1 deletion scripts/reboot
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ REBOOT_TIME=$(date)
VMCORE_FILE=/proc/vmcore
if [ -e $VMCORE_FILE -a -s $VMCORE_FILE ]; then
echo "We have a /proc/vmcore, then we just kdump'ed"
echo "User issued 'kdump' command [User: kdump, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE}
echo "Kernel Panic [Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE}
sync
PLATFORM=$(grep -oP 'sonic_platform=\K\S+' /proc/cmdline)
if [ ! -z "${PLATFORM}" -a -x ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} ]; then
Expand Down
54 changes: 40 additions & 14 deletions show/reboot_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,21 @@
import utilities_common.cli as clicommon


PREVIOUS_REBOOT_CAUSE_FILE = "/host/reboot-cause/previous-reboot-cause.json"
USER_ISSUED_REBOOT_CAUSE_REGEX ="User issued \'{}\' command [User: {}, Time: {}]"
PREVIOUS_REBOOT_CAUSE_FILE_PATH = "/host/reboot-cause/previous-reboot-cause.json"


def read_reboot_cause_file():
result = ""
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
with open(PREVIOUS_REBOOT_CAUSE_FILE) as f:
result = json.load(f)
return result
reboot_cause_dict = {}

if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE_PATH):
with open(PREVIOUS_REBOOT_CAUSE_FILE_PATH) as prev_reboot_cause_file:
try:
reboot_cause_dict = json.load(prev_reboot_cause_file)
except json.JSONDecodeError as err:
click.echo("Failed to load JSON file '{}'!".format(PREVIOUS_REBOOT_CAUSE_FILE_PATH), err=True)

return reboot_cause_dict


#
# 'reboot-cause' group ("show reboot-cause")
Expand All @@ -26,15 +32,35 @@ def read_reboot_cause_file():
def reboot_cause(ctx):
"""Show cause of most recent reboot"""
if ctx.invoked_subcommand is None:
reboot_cause = ""
reboot_cause_str = ""

# Read the previous reboot cause
data = read_reboot_cause_file()
if data['user'] == "N/A":
reboot_cause = "{}".format(data['cause'])
reboot_cause_dict = read_reboot_cause_file()

reboot_cause = reboot_cause_dict.get("cause", "Unknown")
reboot_user = reboot_cause_dict.get("user", "N/A")
reboot_time = reboot_cause_dict.get("time", "N/A")

if reboot_user != "N/A":
reboot_cause_str = "User issued '{}' command".format(reboot_cause)
else:
reboot_cause = USER_ISSUED_REBOOT_CAUSE_REGEX.format(data['cause'], data['user'], data['time'])
reboot_cause_str = reboot_cause

if reboot_user != "N/A" or reboot_time != "N/A":
reboot_cause_str += " ["

if reboot_user != "N/A":
reboot_cause_str += "User: {}".format(reboot_user)
if reboot_time != "N/A":
reboot_cause_str += ", "

if reboot_time != "N/A":
reboot_cause_str += "Time: {}".format(reboot_time)

reboot_cause_str += "]"

click.echo(reboot_cause_str)

click.echo(reboot_cause)

# 'history' subcommand ("show reboot-cause history")
@reboot_cause.command()
Expand All @@ -54,7 +80,7 @@ def history():
for tk in table_keys:
entry = db.get_all(db.STATE_DB, tk)
r = []
r.append(tk.replace(prefix,""))
r.append(tk.replace(prefix, ""))
r.append(entry['cause'] if 'cause' in entry else "")
r.append(entry['time'] if 'time' in entry else "")
r.append(entry['user'] if 'user' in entry else "")
Expand Down
2 changes: 1 addition & 1 deletion tests/reboot_cause_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setup_class(cls):

# Test 'show reboot-cause' without previous-reboot-cause.json
def test_reboot_cause_no_history_file(self):
expected_output = ""
expected_output = "Unknown\n"
runner = CliRunner()
result = runner.invoke(show.cli.commands["reboot-cause"], [])
assert result.output == expected_output
Expand Down

0 comments on commit 4d89510

Please sign in to comment.