Skip to content

Commit

Permalink
Add dynamic sensor logic for fixed and psu presence/state checking in…
Browse files Browse the repository at this point in the history
… thermalctld (#401)

* add modular sensor logic even for fixed devices and presence/status checking for PSUs

* test changes

* fixing accidental removal of name

* logic correction

* isolating key error

* psu runtime change

* fixing whitespace addition

* remove powergood check from thermalctld logic
  • Loading branch information
gregoryboudreau authored Nov 28, 2023
1 parent 55a6828 commit e2d9f87
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 22 deletions.
41 changes: 22 additions & 19 deletions sonic-thermalctld/scripts/thermalctld
Original file line number Diff line number Diff line change
Expand Up @@ -520,10 +520,10 @@ class TemperatureUpdater(logger.Logger):
self.table = swsscommon.Table(state_db, TemperatureUpdater.TEMPER_INFO_TABLE_NAME)
self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE)
self.chassis_table = None
self.all_thermals = set()

self.is_chassis_system = chassis.is_modular_chassis()
if self.is_chassis_system:
self.module_thermals = set()
my_slot = try_get(chassis.get_my_slot, INVALID_SLOT)
if my_slot != INVALID_SLOT:
try:
Expand Down Expand Up @@ -566,30 +566,34 @@ class TemperatureUpdater(logger.Logger):
:return:
"""
self.log_debug("Start temperature updating")
available_thermals = set()
for index, thermal in enumerate(self.chassis.get_all_thermals()):
if self.task_stopping_event.is_set():
return

available_thermals.add((thermal, CHASSIS_INFO_KEY, index))
self._refresh_temperature_status(CHASSIS_INFO_KEY, thermal, index)

for psu_index, psu in enumerate(self.chassis.get_all_psus()):
parent_name = 'PSU {}'.format(psu_index + 1)
for thermal_index, thermal in enumerate(psu.get_all_thermals()):
if self.task_stopping_event.is_set():
return
if psu.get_presence():
for thermal_index, thermal in enumerate(psu.get_all_thermals()):
if self.task_stopping_event.is_set():
return

self._refresh_temperature_status(parent_name, thermal, thermal_index)
available_thermals.add((thermal, parent_name, thermal_index))
self._refresh_temperature_status(parent_name, thermal, thermal_index)

for sfp_index, sfp in enumerate(self.chassis.get_all_sfps()):
parent_name = 'SFP {}'.format(sfp_index + 1)
for thermal_index, thermal in enumerate(sfp.get_all_thermals()):
if self.task_stopping_event.is_set():
return

available_thermals.add((thermal, parent_name, thermal_index))
self._refresh_temperature_status(parent_name, thermal, thermal_index)

if self.is_chassis_system:
available_thermals = set()
for module_index, module in enumerate(self.chassis.get_all_modules()):
module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1))

Expand All @@ -610,19 +614,18 @@ class TemperatureUpdater(logger.Logger):
self._refresh_temperature_status(sfp_name, thermal, thermal_index)

for psu_index, psu in enumerate(module.get_all_psus()):
psu_name = '{} PSU {}'.format(module_name, psu_index + 1)
for thermal_index, thermal in enumerate(psu.get_all_thermals()):
if self.task_stopping_event.is_set():
return

available_thermals.add((thermal, psu_name, thermal_index))
self._refresh_temperature_status(psu_name, thermal, thermal_index)


thermals_to_remove = self.module_thermals - available_thermals
self.module_thermals = available_thermals
for thermal, parent_name, thermal_index in thermals_to_remove:
self._remove_thermal_from_db(thermal, parent_name, thermal_index)
if psu.get_presence():
psu_name = '{} PSU {}'.format(module_name, psu_index + 1)
for thermal_index, thermal in enumerate(psu.get_all_thermals()):
if self.task_stopping_event.is_set():
return
available_thermals.add((thermal, psu_name, thermal_index))
self._refresh_temperature_status(psu_name, thermal, thermal_index)

thermals_to_remove = self.all_thermals - available_thermals
self.all_thermals = available_thermals
for thermal, parent_name, thermal_index in thermals_to_remove:
self._remove_thermal_from_db(thermal, parent_name, thermal_index)

self.log_debug("End temperature updating")

Expand Down
3 changes: 3 additions & 0 deletions sonic-thermalctld/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ def get_serial(self):

def get_status(self):
return self._status

def get_powergood_status(self):
return self._status

def set_status(self, status):
self._status = status
Expand Down
6 changes: 3 additions & 3 deletions sonic-thermalctld/tests/test_thermalctld.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,11 +501,11 @@ def test_update_module_thermals(self):
chassis.set_modular_chassis(True)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
temperature_updater.update()
assert len(temperature_updater.module_thermals) == 3
assert len(temperature_updater.all_thermals) == 3

chassis._module_list = []
temperature_updater.update()
assert len(temperature_updater.module_thermals) == 0
assert len(temperature_updater.all_thermals) == 0


# Modular chassis-related tests
Expand Down

0 comments on commit e2d9f87

Please sign in to comment.