diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/watchdog.py b/platform/mellanox/mlnx-platform-api/sonic_platform/watchdog.py index 2a032d3131f2..b1088eb46f0f 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/watchdog.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/watchdog.py @@ -27,6 +27,7 @@ import time from sonic_platform_base.watchdog_base import WatchdogBase +from . import utils """ ioctl constants """ IO_WRITE = 0x40000000 @@ -80,15 +81,17 @@ def __init__(self, wd_device_path): super(WatchdogImplBase, self).__init__() self.watchdog_path = wd_device_path - self.watchdog = self.open_handle() + self._watchdog = None + self.timeout = self._gettimeout() - # Opening a watchdog descriptor starts - # watchdog timer; - # by default it should be stopped - self._disablecard() - self.armed = False + @property + def watchdog(self): + if self._watchdog is None: + self._watchdog = self.open_handle() + return self._watchdog - self.timeout = self._gettimeout() + def open_handle(self): + return os.open(self.watchdog_path, os.O_WRONLY) def open_handle(self): return os.open(self.watchdog_path, os.O_WRONLY) @@ -134,10 +137,7 @@ def _gettimeout(self): @return watchdog timeout """ - req = array.array('I', [0]) - fcntl.ioctl(self.watchdog, WDIOC_GETTIMEOUT, req, True) - - return int(req[0]) + return utils.read_int_from_file('/run/hw-management/watchdog/main/timeout') def _gettimeleft(self): """ @@ -145,10 +145,7 @@ def _gettimeleft(self): @return time left in seconds """ - req = array.array('I', [0]) - fcntl.ioctl(self.watchdog, WDIOC_GETTIMELEFT, req, True) - - return int(req[0]) + return utils.read_int_from_file('/run/hw-management/watchdog/main/timeleft') def arm(self, seconds): """ @@ -162,11 +159,10 @@ def arm(self, seconds): try: if self.timeout != seconds: self.timeout = self._settimeout(seconds) - if self.armed: + if self.is_armed(): self._keepalive() else: self._enablecard() - self.armed = True ret = self.timeout except IOError: pass @@ -179,10 +175,9 @@ def disarm(self): """ disarmed = False - if self.armed: + if self.is_armed(): try: self._disablecard() - self.armed = False disarmed = True except IOError: pass @@ -194,7 +189,7 @@ def is_armed(self): Implements is_armed WatchdogBase API """ - return self.armed + return utils.read_str_from_file('/run/hw-management/watchdog/main/state') == 'active' def get_remaining_time(self): """ @@ -203,7 +198,7 @@ def get_remaining_time(self): timeleft = WD_COMMON_ERROR - if self.armed: + if self.is_armed(): try: timeleft = self._gettimeleft() except IOError: @@ -216,13 +211,15 @@ def __del__(self): Close watchdog """ - os.close(self.watchdog) + if self._watchdog is not None: + os.close(self._watchdog) class WatchdogType1(WatchdogImplBase): """ Watchdog type 1 """ + TIMESTAMP_FILE = '/tmp/nvidia/watchdog_timestamp' def arm(self, seconds): """ @@ -233,7 +230,8 @@ def arm(self, seconds): ret = WatchdogImplBase.arm(self, seconds) # Save the watchdog arm timestamp # requiered for get_remaining_time() - self.arm_timestamp = time.time() + os.makedirs('/tmp/nvidia', exist_ok=True) + utils.write_file(self.TIMESTAMP_FILE, str(time.time())) return ret @@ -246,8 +244,9 @@ def get_remaining_time(self): timeleft = WD_COMMON_ERROR - if self.armed: - timeleft = int(self.timeout - (time.time() - self.arm_timestamp)) + if self.is_armed(): + arm_timestamp = utils.read_float_from_file(self.TIMESTAMP_FILE) + timeleft = int(self.timeout - (time.time() - arm_timestamp)) return timeleft