Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Nvidia] Fix issue: watchdogutil command does not work #186

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 24 additions & 25 deletions platform/mellanox/mlnx-platform-api/sonic_platform/watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import time

from sonic_platform_base.watchdog_base import WatchdogBase
from . import utils

""" ioctl constants """
IO_WRITE = 0x40000000
Expand Down Expand Up @@ -80,15 +81,17 @@ def __init__(self, wd_device_path):
super(WatchdogImplBase, self).__init__()

self.watchdog_path = wd_device_path
self.watchdog = self.open_handle()
self._watchdog = None
self.timeout = self._gettimeout()

# Opening a watchdog descriptor starts
# watchdog timer;
# by default it should be stopped
self._disablecard()
self.armed = False
@property
def watchdog(self):
if self._watchdog is None:
self._watchdog = self.open_handle()
return self._watchdog

self.timeout = self._gettimeout()
def open_handle(self):
return os.open(self.watchdog_path, os.O_WRONLY)

def open_handle(self):
return os.open(self.watchdog_path, os.O_WRONLY)
Expand Down Expand Up @@ -134,21 +137,15 @@ def _gettimeout(self):
@return watchdog timeout
"""

req = array.array('I', [0])
fcntl.ioctl(self.watchdog, WDIOC_GETTIMEOUT, req, True)

return int(req[0])
return utils.read_int_from_file('/run/hw-management/watchdog/main/timeout')

def _gettimeleft(self):
"""
Get time left before watchdog timer expires
@return time left in seconds
"""

req = array.array('I', [0])
fcntl.ioctl(self.watchdog, WDIOC_GETTIMELEFT, req, True)

return int(req[0])
return utils.read_int_from_file('/run/hw-management/watchdog/main/timeleft')

def arm(self, seconds):
"""
Expand All @@ -162,11 +159,10 @@ def arm(self, seconds):
try:
if self.timeout != seconds:
self.timeout = self._settimeout(seconds)
if self.armed:
if self.is_armed():
self._keepalive()
else:
self._enablecard()
self.armed = True
ret = self.timeout
except IOError:
pass
Expand All @@ -179,10 +175,9 @@ def disarm(self):
"""

disarmed = False
if self.armed:
if self.is_armed():
try:
self._disablecard()
self.armed = False
disarmed = True
except IOError:
pass
Expand All @@ -194,7 +189,7 @@ def is_armed(self):
Implements is_armed WatchdogBase API
"""

return self.armed
return utils.read_str_from_file('/run/hw-management/watchdog/main/state') == 'active'

def get_remaining_time(self):
"""
Expand All @@ -203,7 +198,7 @@ def get_remaining_time(self):

timeleft = WD_COMMON_ERROR

if self.armed:
if self.is_armed():
try:
timeleft = self._gettimeleft()
except IOError:
Expand All @@ -216,13 +211,15 @@ def __del__(self):
Close watchdog
"""

os.close(self.watchdog)
if self._watchdog is not None:
os.close(self._watchdog)


class WatchdogType1(WatchdogImplBase):
"""
Watchdog type 1
"""
TIMESTAMP_FILE = '/tmp/nvidia/watchdog_timestamp'

def arm(self, seconds):
"""
Expand All @@ -233,7 +230,8 @@ def arm(self, seconds):
ret = WatchdogImplBase.arm(self, seconds)
# Save the watchdog arm timestamp
# requiered for get_remaining_time()
self.arm_timestamp = time.time()
os.makedirs('/tmp/nvidia', exist_ok=True)
utils.write_file(self.TIMESTAMP_FILE, str(time.time()))

return ret

Expand All @@ -246,8 +244,9 @@ def get_remaining_time(self):

timeleft = WD_COMMON_ERROR

if self.armed:
timeleft = int(self.timeout - (time.time() - self.arm_timestamp))
if self.is_armed():
arm_timestamp = utils.read_float_from_file(self.TIMESTAMP_FILE)
timeleft = int(self.timeout - (time.time() - arm_timestamp))

return timeleft

Expand Down