From e72b28508a5f6cc59c10cc9fd35cd0fe4d063a8e Mon Sep 17 00:00:00 2001 From: Wirut Getbamrung Date: Wed, 2 Dec 2020 14:57:48 +0700 Subject: [PATCH 1/5] [platform/cel]: update dx010 fsc --- .../debian/platform-modules-dx010.install | 1 - .../debian/platform-modules-dx010.postinst | 1 + .../dx010/scripts/fancontrol.sh | 83 ++++++------------- .../dx010/scripts/thermal_overload_control.sh | 75 ----------------- .../services/fancontrol/fancontrol | 52 +----------- 5 files changed, 28 insertions(+), 184 deletions(-) delete mode 100755 platform/broadcom/sonic-platform-modules-cel/dx010/scripts/thermal_overload_control.sh diff --git a/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.install b/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.install index d7720cea90f4..a36e2cd1377c 100644 --- a/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.install +++ b/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.install @@ -3,7 +3,6 @@ dx010/cfg/dx010-modules.conf etc/modules-load.d dx010/systemd/platform-modules-dx010.service lib/systemd/system dx010/scripts/fancontrol.sh etc/init.d dx010/scripts/fancontrol.service lib/systemd/system -dx010/scripts/thermal_overload_control.sh usr/local/bin services/fancontrol/fancontrol usr/local/bin dx010/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-cel_seastone-r0 services/platform_api/platform_api_mgnt.sh usr/local/bin diff --git a/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.postinst b/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.postinst index 8dbf0ece6676..b198584282db 100644 --- a/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.postinst +++ b/platform/broadcom/sonic-platform-modules-cel/debian/platform-modules-dx010.postinst @@ -6,4 +6,5 @@ systemctl start platform-modules-dx010.service systemctl start fancontrol.service /usr/local/bin/platform_api_mgnt.sh install +/etc/init.d/fancontrol.sh install diff --git a/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/fancontrol.sh b/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/fancontrol.sh index 75ad6c65b37f..21274ed6e27d 100644 --- a/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/fancontrol.sh +++ b/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/fancontrol.sh @@ -7,75 +7,42 @@ # Default-Start: 2 3 4 5 # Default-Stop: # Short-Description: fancontrol -# Description: fan speed regulator +# Description: fancontrol configuration selector ### END INIT INFO . /lib/lsb/init-functions [ -f /etc/default/rcS ] && . /etc/default/rcS -PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin -DAEMON=/usr/local/bin/fancontrol -DESC="fan speed regulator" -NAME="fancontrol" -PIDFILE=/var/run/fancontrol.pid MAIN_CONF=/usr/share/sonic/device/x86_64-cel_seastone-r0/fancontrol -DEVPATH=/sys/devices/pci0000:00/0000:00:13.0/i2c-*/i2c-13/13-002e GPIO_DIR=/sys/class/gpio -BASE_GPIO=$(find $GPIO_DIR | grep gpiochip | grep -o '[[:digit:]]*') -DIRGPIO_START=15 -test -x $DAEMON || exit 0 +init() { + DIRGPIO_START=15 + BASE_GPIO=$(find $GPIO_DIR | grep gpiochip | grep -o '[[:digit:]]*') + FANDIR_GPIO_NUMBER=$((DIRGPIO_START + BASE_GPIO)) + FANDIR_VALUE=$(cat ${GPIO_DIR}/gpio${FANDIR_GPIO_NUMBER}/value) + DIRGPIO_START=$((DIRGPIO_START + 1)) + FANDIR=$([ $FANDIR_VALUE = 1 ] && echo "B2F" || echo "F2B") + CONF=${MAIN_CONF}-${FANDIR} + echo $FANDIR > /usr/share/sonic/device/x86_64-cel_seastone-r0/fan_airflow +} -for i in 1 2 3 4 5 -do - FANFAULT=$(cat ${DEVPATH}/fan${i}_fault) - [ $FANFAULT = 1 ] && continue - FANDIR_GPIO_NUMBER=$((DIRGPIO_START + BASE_GPIO)) - FANDIR_VALUE=$(cat ${GPIO_DIR}/gpio${FANDIR_GPIO_NUMBER}/value) - DIRGPIO_START=$((DIRGPIO_START+1)) - FANDIR=$([ $FANDIR_VALUE = 1 ] && echo "B2F" || echo "F2B") -done -CONF=${MAIN_CONF}-${FANDIR} +install() { + find /var/lib/docker/overlay*/ -path */sbin/fancontrol -exec cp /usr/local/bin/fancontrol {} \; +} case "$1" in - start) - if [ -f $CONF ] ; then - if $DAEMON --check $CONF 1>/dev/null 2>/dev/null ; then - log_daemon_msg "Starting $DESC" "$NAME\n" - start-stop-daemon --start --quiet --pidfile $PIDFILE --startas $DAEMON $CONF - log_end_msg $? - else - log_failure_msg "Not starting fancontrol, broken configuration file; please re-run pwmconfig." - fi - else - if [ "$VERBOSE" != no ]; then - log_warning_msg "Not starting fancontrol; run pwmconfig first." - fi - fi - ;; - stop) - log_daemon_msg "Stopping $DESC" "$NAME" - start-stop-daemon --stop --quiet --pidfile $PIDFILE --oknodo --startas $DAEMON $CONF - rm -f $PIDFILE - log_end_msg $? - ;; - restart) - $0 stop - sleep 3 - $0 start - ;; - force-reload) - if start-stop-daemon --stop --test --quiet --pidfile $PIDFILE --startas $DAEMON $CONF ; then - $0 restart - fi - ;; - status) - status_of_proc $DAEMON $NAME $CONF && exit 0 || exit $? - ;; - *) - log_success_msg "Usage: /etc/init.d/fancontrol {start|stop|restart|force-reload|status}" - exit 1 - ;; +start) + init + cp $CONF $MAIN_CONF + ;; +install) + install + ;; +*) + log_success_msg "Usage: /etc/init.d/fancontrol {start} | {install}" + exit 1 + ;; esac exit 0 diff --git a/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/thermal_overload_control.sh b/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/thermal_overload_control.sh deleted file mode 100755 index 57fd851f9cfb..000000000000 --- a/platform/broadcom/sonic-platform-modules-cel/dx010/scripts/thermal_overload_control.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -# -# Copyright 2020-present Celestica. All Rights Reserved. -# -# This program file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# - -PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin -SETREG_FILE=/sys/devices/platform/dx010_cpld/setreg -TOVERREG=0x140 -CPUOVER=0xa1 -ASICOVER=0xa2 - -prog="$0" -command="$1" - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - exit 1 -fi - -usage() { - echo "Usage: thermal_overload_control.sh [option] " - echo - echo "Options:" - echo " -h, --help : to print this message." - echo - echo "Commands:" - echo - echo " cpu: To enabling CPU thermal overload handler" - echo - echo " asic : To enabling ASIC thermal overload handler" - echo -} - -cpu_overload() { - logger "Enable CPU thermal overload control" - set_reg=`echo ${TOVERREG} ${CPUOVER} > ${SETREG_FILE}` -} - -asic_overload() { - logger "Enable ASIC thermal overload control" - set_reg=`echo ${TOVERREG} ${ASICOVER} > ${SETREG_FILE}` -} - -if [ $# -lt 1 ]; then - usage - exit -1 -fi - -case "$command" in --h | --help) - usage - ;; -cpu) - cpu_overload - ;; -asic) - asic_overload - ;; -*) - usage - exit -1 - ;; -esac - -exit $? diff --git a/platform/broadcom/sonic-platform-modules-cel/services/fancontrol/fancontrol b/platform/broadcom/sonic-platform-modules-cel/services/fancontrol/fancontrol index cdd5005e3688..da32acedd2f9 100755 --- a/platform/broadcom/sonic-platform-modules-cel/services/fancontrol/fancontrol +++ b/platform/broadcom/sonic-platform-modules-cel/services/fancontrol/fancontrol @@ -180,42 +180,6 @@ function LoadConfig } -function CheckFanFault() -{ - let fancount=0 - while (( $fancount < ${#AFCFANFAULT[@]} )) # go through all fan fault. - do - fault=`cat ${AFCFANFAULT[$fancount]}` - if [[ "$fault" == "1" ]] - then - return 1 # fan fault detected - fi - let fancount=$fancount+1 - done - return 0 -} - -function CheckTempOver() -{ - let tempcount=0 - while (( $tempcount < ${#CSTEMP[@]} )) # go through all temp. - do - ctemp=`cat ${CSTEMP[$tempcount]}` - let maxcrit="${CSMAXTEMPCRIT[$tempcount]}*1000" - if [ $ctemp -ge $maxcrit ] - then - logger "Thermal overload : ${CSMAXTEMPTYPE[$tempcount]} temperature ${ctemp} > ${maxcrit}" - if [ -f "$THERMAL_OVERLOAD_CONTROL_FILE" ] - then - toc_cmd="${THERMAL_OVERLOAD_CONTROL_FILE} ${CSMAXTEMPTYPE[$tempcount],,}" - bash $toc_cmd - exit 1 - fi - fi - let tempcount=$tempcount+1 - done - return 0 -} function DevicePath() { @@ -508,18 +472,6 @@ function UpdateFanSpeeds maxpwm=${AFCMAXPWM[$fcvcount]} let tHyst="${AFCTHYST[$fcvcount]}*1000" - #if some fan fault detected all pwm=100% - CheckFanFault - if [ $? -ne 0 ] - then - echo $MAX > $pwmo - let fcvcount=$fcvcount+1 - continue - fi - - #check thermal overload - CheckTempOver - read tval < ${tsens} if [ $? -ne 0 ] then @@ -619,7 +571,7 @@ function UpdateFanSpeeds echo $minsa > $pwmo # Sleep while still handling signals sleep 1 & - wait $! + wait fi fi echo $pwmval > $pwmo # write new value to pwm output @@ -658,5 +610,5 @@ do UpdateFanSpeeds # Sleep while still handling signals sleep $INTERVAL & - wait $! + wait done From 339f8e2c530f38c229625eeb771857da79329eb8 Mon Sep 17 00:00:00 2001 From: Wirut Getbamrung Date: Wed, 2 Dec 2020 15:07:10 +0700 Subject: [PATCH 2/5] [device/celestica]: add thermalctld api support to dx010 device --- .../sonic_platform/chassis.py | 12 +- .../sonic_platform/fan.py | 18 +- .../sonic_platform/thermal.py | 144 +++++++++++---- .../sonic_platform/thermal_actions.py | 79 +++++++++ .../sonic_platform/thermal_conditions.py | 77 ++++++++ .../sonic_platform/thermal_infos.py | 165 ++++++++++++++++++ .../sonic_platform/thermal_manager.py | 46 +++++ .../thermal_overload_control.sh | 75 ++++++++ .../thermal_policy.json | 93 ++++++++++ 9 files changed, 677 insertions(+), 32 deletions(-) create mode 100644 device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py create mode 100644 device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_conditions.py create mode 100644 device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_infos.py create mode 100644 device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_manager.py create mode 100755 device/celestica/x86_64-cel_seastone-r0/thermal_overload_control.sh create mode 100644 device/celestica/x86_64-cel_seastone-r0/thermal_policy.json diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/chassis.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/chassis.py index 5466d629fb2f..da55249fba90 100644 --- a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/chassis.py +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/chassis.py @@ -66,8 +66,9 @@ def __initialize_fan(self): def __initialize_thermals(self): from sonic_platform.thermal import Thermal + airflow = self.__get_air_flow() for index in range(0, NUM_THERMAL): - thermal = Thermal(index) + thermal = Thermal(index, airflow) self._thermal_list.append(thermal) def __initialize_eeprom(self): @@ -80,6 +81,11 @@ def __initialize_components(self): component = Component(index) self._component_list.append(component) + def __get_air_flow(self): + air_flow_path = '/usr/share/sonic/device/{}/fan_airflow'.format(self._api_helper.platform) if self.is_host else '/usr/share/sonic/platform/fan_airflow' + air_flow = self._api_helper.read_one_line_file(air_flow_path) + return air_flow or 'B2F' + def get_base_mac(self): """ Retrieves the base MAC address for the chassis @@ -251,3 +257,7 @@ def get_status(self): A boolean value, True if device is operating properly, False if not """ return True + + def get_thermal_manager(self): + from .thermal_manager import ThermalManager + return ThermalManager diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/fan.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/fan.py index 10875ee30753..bf2b27019b63 100644 --- a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/fan.py +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/fan.py @@ -304,4 +304,20 @@ def get_status(self): Returns: A boolean value, True if device is operating properly, False if not """ - return self.get_presence() and self.get_speed() > 0 + status = 1 + if self.is_psu_fan: + fan_fault_sysfs_name = "fan1_fault" + fan_fault_sysfs_path = self.__search_file_by_name( + self.psu_hwmon_path, fan_fault_sysfs_name) + status = self._api_helper.read_one_line_file(fan_fault_sysfs_path) + + elif self.get_presence(): + chip = self.emc2305_chip_mapping[self.fan_index] + device = chip['device'] + fan_index = chip['index_map'] + sysfs_path = "%s%s/%s" % ( + EMC2305_PATH, device, 'fan{}_fault') + sysfs_path = sysfs_path.format(fan_index[self.fan_tray_index]) + status = self._api_helper.read_one_line_file(sysfs_path) + + return False if int(status) != 0 else True diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal.py index 9e891b65e405..2b38ef94d6c4 100644 --- a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal.py +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal.py @@ -16,46 +16,75 @@ except ImportError as e: raise ImportError(str(e) + "- required module not found") +THERMAL_INFO = { + 0: { + "F2B_max": 50, + "B2F_max": 55, + "postion": "asic", + "name": "Front-panel temp sensor 1", + "i2c_path": "i2c-5/5-0048/hwmon/hwmon1", # u4 system-inlet + }, + 1: { + "F2B_max": 50, + "B2F_max": 55, + "postion": "asic", + "name": "Front-panel temp sensor 2", + "i2c_path": "i2c-6/6-0049/hwmon/hwmon2", # u2 system-inlet + }, + 2: { + "F2B_max": 70, + "F2B_max_crit": 75, + "B2F_max": 60, + "B2F_max_crit": 65, + "postion": "asic", + "name": "ASIC temp sensor", + "i2c_path": "i2c-7/7-004a/hwmon/hwmon3", # u44 bmc56960-on-board + }, + 3: { + "F2B_max": 70, + "F2B_max_crit": 75, + "B2F_max": 70, + "B2F_max_crit": 75, + "postion": "cpu", + "name": "Rear-panel temp sensor 1", + "i2c_path": "i2c-14/14-0048/hwmon/hwmon4", # u9200 cpu-on-board + }, + 4: { + "F2B_max": 70, + "B2F_max": 55, + "postion": "cpu", + "name": "Rear-panel temp sensor 2", + "i2c_path": "i2c-15/15-004e/hwmon/hwmon5" # u9201 system-outlet + } +} +NULL_VAL = "N/A" +I2C_ADAPTER_PATH = "/sys/class/i2c-adapter" + class Thermal(ThermalBase): """Platform-specific Thermal class""" - THERMAL_NAME_LIST = [] - I2C_ADAPTER_PATH = "/sys/class/i2c-adapter" SS_CONFIG_PATH = "/usr/share/sonic/device/x86_64-cel_seastone-r0/sensors.conf" - def __init__(self, thermal_index): + def __init__(self, thermal_index, airflow): self.index = thermal_index self._api_helper = APIHelper() - - # Add thermal name - self.THERMAL_NAME_LIST.append("Front-panel temp sensor 1") - self.THERMAL_NAME_LIST.append("Front-panel temp sensor 2") - self.THERMAL_NAME_LIST.append("ASIC temp sensor") - self.THERMAL_NAME_LIST.append("Rear-panel temp sensor 1") - self.THERMAL_NAME_LIST.append("Rear-panel temp sensor 2") - - # Set hwmon path - i2c_path = { - 0: "i2c-5/5-0048/hwmon/hwmon1", # u4 system-inlet - 1: "i2c-6/6-0049/hwmon/hwmon2", # u2 system-inlet - 2: "i2c-7/7-004a/hwmon/hwmon3", # u44 bmc56960-on-board - 3: "i2c-14/14-0048/hwmon/hwmon4", # u9200 cpu-on-board - 4: "i2c-15/15-004e/hwmon/hwmon5" # u9201 system-outlet - }.get(self.index, None) - - self.hwmon_path = "{}/{}".format(self.I2C_ADAPTER_PATH, i2c_path) - self.ss_key = self.THERMAL_NAME_LIST[self.index] + self._airflow = airflow + self._thermal_info = THERMAL_INFO[self.index] + self._hwmon_path = "{}/{}".format(I2C_ADAPTER_PATH, + self._thermal_info["i2c_path"]) + self.name = self.get_name() + self.postion = self._thermal_info["postion"] self.ss_index = 1 def __get_temp(self, temp_file): - temp_file_path = os.path.join(self.hwmon_path, temp_file) + temp_file_path = os.path.join(self._hwmon_path, temp_file) raw_temp = self._api_helper.read_txt_file(temp_file_path) temp = float(raw_temp)/1000 return float("{:.3f}".format(temp)) def __set_threshold(self, file_name, temperature): - temp_file_path = os.path.join(self.hwmon_path, file_name) + temp_file_path = os.path.join(self._hwmon_path, file_name) try: with open(temp_file_path, 'w') as fd: fd.write(str(temperature)) @@ -80,8 +109,17 @@ def get_high_threshold(self): A float number, the high threshold temperature of thermal in Celsius up to nearest thousandth of one degree Celsius, e.g. 30.125 """ - temp_file = "temp{}_max".format(self.ss_index) - return self.__get_temp(temp_file) + max_crit_key = '{}_max'.format(self._airflow) + return self._thermal_info.get(max_crit_key, None) + + def get_low_threshold(self): + """ + Retrieves the low threshold temperature of thermal + Returns: + A float number, the low threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + return 0.0 def set_high_threshold(self, temperature): """ @@ -102,7 +140,7 @@ def set_high_threshold(self, temperature): f.seek(0) ss_found = False for idx, val in enumerate(content): - if self.ss_key in val: + if self.name in val: ss_found = True elif ss_found and temp_file in val: content[idx] = " set {} {}\n".format( @@ -115,13 +153,43 @@ def set_high_threshold(self, temperature): return is_set & file_set + def set_low_threshold(self, temperature): + """ + Sets the low threshold temperature of thermal + Args : + temperature: A float number up to nearest thousandth of one degree Celsius, + e.g. 30.125 + Returns: + A boolean, True if threshold is set successfully, False if not + """ + return False + + def get_high_critical_threshold(self): + """ + Retrieves the high critical threshold temperature of thermal + Returns: + A float number, the high critical threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + max_crit_key = '{}_max_crit'.format(self._airflow) + return self._thermal_info.get(max_crit_key, None) + + def get_low_critical_threshold(self): + """ + Retrieves the low critical threshold temperature of thermal + Returns: + A float number, the low critical threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + return 0.0 + def get_name(self): """ Retrieves the name of the thermal device Returns: string: The name of the thermal device """ - return self.THERMAL_NAME_LIST[self.index] + return self._thermal_info["name"] def get_presence(self): """ @@ -130,9 +198,25 @@ def get_presence(self): bool: True if PSU is present, False if not """ temp_file = "temp{}_input".format(self.ss_index) - temp_file_path = os.path.join(self.hwmon_path, temp_file) + temp_file_path = os.path.join(self._hwmon_path, temp_file) return os.path.isfile(temp_file_path) + def get_model(self): + """ + Retrieves the model number (or part number) of the device + Returns: + string: Model/part number of device + """ + return NULL_VAL + + def get_serial(self): + """ + Retrieves the serial number of the device + Returns: + string: Serial number of device + """ + return NULL_VAL + def get_status(self): """ Retrieves the operational status of the device @@ -143,7 +227,7 @@ def get_status(self): return False fault_file = "temp{}_fault".format(self.ss_index) - fault_file_path = os.path.join(self.hwmon_path, fault_file) + fault_file_path = os.path.join(self._hwmon_path, fault_file) if not os.path.isfile(fault_file_path): return True diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py new file mode 100644 index 000000000000..15ee7ce3fb26 --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py @@ -0,0 +1,79 @@ + +from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase +from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object +from .thermal_infos import ChassisInfo +from .helper import APIHelper + + +@thermal_json_object('thermal_control.control') +class ControlThermalAlgoAction(ThermalPolicyActionBase): + """ + Action to control the thermal control algorithm + """ + # JSON field definition + JSON_FIELD_STATUS = 'status' + + def __init__(self): + self.status = True + + def load_from_json(self, json_obj): + """ + Construct ControlThermalAlgoAction via JSON. JSON example: + { + "type": "thermal_control.control" + "status": "true" + } + :param json_obj: A JSON object representing a ControlThermalAlgoAction action. + :return: + """ + if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj: + status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower( + ) + if status_str == 'true': + self.status = True + elif status_str == 'false': + self.status = False + else: + raise ValueError('Invalid {} field value, please specify true of false'. + format(ControlThermalAlgoAction.JSON_FIELD_STATUS)) + else: + raise ValueError('ControlThermalAlgoAction ' + 'missing mandatory field {} in JSON policy file'. + format(ControlThermalAlgoAction.JSON_FIELD_STATUS)) + + def execute(self, thermal_info_dict): + """ + Disable thermal control algorithm + :param thermal_info_dict: A dictionary stores all thermal information. + :return: + """ + if ChassisInfo.INFO_NAME in thermal_info_dict: + chassis_info_obj = thermal_info_dict[ChassisInfo.INFO_NAME] + chassis = chassis_info_obj.get_chassis() + thermal_manager = chassis.get_thermal_manager() + if self.status: + thermal_manager.start_thermal_control_algorithm() + else: + thermal_manager.stop_thermal_control_algorithm() + + +@thermal_json_object('switch.power_cycling') +class SwitchPolicyAction(ThermalPolicyActionBase): + """ + Base class for thermal action. Once all thermal conditions in a thermal policy are matched, + all predefined thermal action will be executed. + """ + + def execute(self, thermal_info_dict): + """ + Take action when thermal condition matches. For example, power cycle the switch. + :param thermal_info_dict: A dictionary stores all thermal information. + :return: + """ + thermal_overload_position_path = '/tmp/thermal_overload_position' + thermal_overload_position = APIHelper().read_one_line_file( + thermal_overload_position_path) + + cmd = 'bash /usr/share/sonic/platform/thermal_overload_control.sh {}'.format( + thermal_overload_position) + APIHelper().run_command(cmd) diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_conditions.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_conditions.py new file mode 100644 index 000000000000..1eee8ea91ab5 --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_conditions.py @@ -0,0 +1,77 @@ +from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase +from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object + + +class FanCondition(ThermalPolicyConditionBase): + def get_fan_info(self, thermal_info_dict): + from .thermal_infos import FanInfo + if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo): + return thermal_info_dict[FanInfo.INFO_NAME] + else: + return None + + +@thermal_json_object('fan.any.absence') +class AnyFanAbsenceCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_absence_fans()) > 0 if fan_info_obj else False + + +@thermal_json_object('fan.any.fault') +class AnyFanFaultCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_fault_fans()) > 0 if fan_info_obj else False + + +@thermal_json_object('fan.all.presence') +class AllFanPresenceCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False + + +@thermal_json_object('fan.all.good') +class AllFanGoodCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_fault_fans()) == 0 if fan_info_obj else False + + +class ThermalCondition(ThermalPolicyConditionBase): + def get_thermal_info(self, thermal_info_dict): + from .thermal_infos import ThermalInfo + if ThermalInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ThermalInfo.INFO_NAME], ThermalInfo): + return thermal_info_dict[ThermalInfo.INFO_NAME] + else: + return None + + +@thermal_json_object('thermal.over.high_threshold') +class ThermalOverHighCriticalCondition(ThermalCondition): + def is_match(self, thermal_info_dict): + thermal_info_obj = self.get_thermal_info(thermal_info_dict) + if thermal_info_obj: + return thermal_info_obj.is_over_high_threshold() + else: + return False + + +@thermal_json_object('thermal.over.high_critical_threshold') +class ThermalOverHighCriticalCondition(ThermalCondition): + def is_match(self, thermal_info_dict): + thermal_info_obj = self.get_thermal_info(thermal_info_dict) + if thermal_info_obj: + return thermal_info_obj.is_over_high_critical_threshold() + else: + return False + +@thermal_json_object('thermal.all.good') +class ThermalGoodCondition(ThermalCondition): + def is_match(self, thermal_info_dict): + thermal_info_obj = self.get_thermal_info(thermal_info_dict) + if thermal_info_obj: + return not thermal_info_obj.is_over_threshold() + else: + return False diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_infos.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_infos.py new file mode 100644 index 000000000000..a680b31b634f --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_infos.py @@ -0,0 +1,165 @@ +from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase +from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object +from .helper import APIHelper +import time + + +@thermal_json_object('fan_info') +class FanInfo(ThermalPolicyInfoBase): + """ + Fan information needed by thermal policy + """ + + # Fan information name + INFO_NAME = 'fan_info' + + def __init__(self): + self._absence_fans = set() + self._presence_fans = set() + self._fault_fans = set() + self._status_changed = False + + def collect(self, chassis): + """ + Collect absence and presence fans. + :param chassis: The chassis object + :return: + """ + self._status_changed = False + for fan in chassis.get_all_fans(): + presence = fan.get_presence() + status = fan.get_status() + if presence and fan not in self._presence_fans: + self._presence_fans.add(fan) + self._status_changed = True + if fan in self._absence_fans: + self._absence_fans.remove(fan) + elif not presence and fan not in self._absence_fans: + self._absence_fans.add(fan) + self._status_changed = True + if fan in self._presence_fans: + self._presence_fans.remove(fan) + + if not status and fan not in self._fault_fans: + self._fault_fans.add(fan) + self._status_changed = True + + elif status and fan in self._fault_fans: + self._fault_fans.remove(fan) + self._status_changed = True + + def get_absence_fans(self): + """ + Retrieves absence fans + :return: A set of absence fans + """ + return self._absence_fans + + def get_presence_fans(self): + """ + Retrieves presence fans + :return: A set of presence fans + """ + return self._presence_fans + + def get_fault_fans(self): + """ + Retrieves fault fans + :return: A set of fault fans + """ + return self._fault_fans + + def is_status_changed(self): + """ + Retrieves if the status of fan information changed + :return: True if status changed else False + """ + return self._status_changed + + +@thermal_json_object('thermal_info') +class ThermalInfo(ThermalPolicyInfoBase): + """ + Thermal information needed by thermal policy + """ + + # Fan information name + INFO_NAME = 'thermal_info' + + def collect(self, chassis): + """ + Collect thermal sensor temperature change status + :param chassis: The chassis object + :return: + """ + self._over_high_threshold = False + self._over_high_critical_threshold = False + self._thermal_overload_position = 'cpu' + + # Calculate average temp within the device + temp = 0 + num_of_thermals = chassis.get_num_thermals() + for index in range(num_of_thermals): + thermal = chassis.get_thermal(index) + temp = thermal.get_temperature() + high_threshold = thermal.get_high_threshold() + high_critical_threshold = thermal.get_high_critical_threshold() + + if high_threshold and temp > high_threshold: + self._over_high_threshold = True + + if high_critical_threshold and temp > high_critical_threshold: + self._thermal_overload_position = thermal.postion + self._over_high_critical_threshold = True + + def is_over_threshold(self): + """ + Retrieves if the temperature is over any threshold + :return: True if the temperature is over any threshold else False + """ + return self._over_high_threshold or self._over_high_critical_threshold + + def is_over_high_critical_threshold(self): + """ + Retrieves if the temperature is over high critical threshold + :return: True if the temperature is over high critical threshold else False + """ + thermal_overload_position_path = '/tmp/thermal_overload_position' + if self._over_high_critical_threshold: + APIHelper().write_txt_file(thermal_overload_position_path, + self._thermal_overload_position) + time.sleep(1) + return self._over_high_critical_threshold + + def is_over_high_threshold(self): + """ + Retrieves if the temperature is over high threshold + :return: True if the temperature is over high threshold else False + """ + return self._over_high_threshold + + +@thermal_json_object('chassis_info') +class ChassisInfo(ThermalPolicyInfoBase): + """ + Chassis information needed by thermal policy + """ + INFO_NAME = 'chassis_info' + + def __init__(self): + self._chassis = None + + def collect(self, chassis): + """ + Collect platform chassis. + :param chassis: The chassis object + :return: + """ + self._chassis = chassis + + def get_chassis(self): + """ + Retrieves platform chassis object + :return: A platform chassis object. + """ + return self._chassis diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_manager.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_manager.py new file mode 100644 index 000000000000..9f057cf1f37f --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_manager.py @@ -0,0 +1,46 @@ +from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase +from .helper import APIHelper +from .thermal_actions import * +from .thermal_conditions import * +from .thermal_infos import * + +class ThermalManager(ThermalManagerBase): + FSC_ALGORITHM_CMD = 'service fancontrol {}' + + @classmethod + def start_thermal_control_algorithm(cls): + """ + Start vendor specific thermal control algorithm. The default behavior of this function is a no-op. + :return: + """ + return cls._enable_fancontrol_service(True) + + @classmethod + def stop_thermal_control_algorithm(cls): + """ + Stop thermal control algorithm + Returns: + bool: True if set success, False if fail. + """ + return cls._enable_fancontrol_service(False) + + @classmethod + def deinitialize(cls): + """ + Destroy thermal manager, including any vendor specific cleanup. The default behavior of this function + is a no-op. + :return: + """ + return cls._enable_fancontrol_service(True) + + @classmethod + def _enable_fancontrol_service(cls, enable): + """ + Control thermal by fcs algorithm + Args: + enable: Bool, indicate enable the algorithm or not + Returns: + bool: True if set success, False if fail. + """ + cmd = 'start' if enable else 'stop' + return APIHelper().run_command(cls.FSC_ALGORITHM_CMD.format(cmd)) diff --git a/device/celestica/x86_64-cel_seastone-r0/thermal_overload_control.sh b/device/celestica/x86_64-cel_seastone-r0/thermal_overload_control.sh new file mode 100755 index 000000000000..57fd851f9cfb --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/thermal_overload_control.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# +# Copyright 2020-present Celestica. All Rights Reserved. +# +# This program file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# + +PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin +SETREG_FILE=/sys/devices/platform/dx010_cpld/setreg +TOVERREG=0x140 +CPUOVER=0xa1 +ASICOVER=0xa2 + +prog="$0" +command="$1" + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + exit 1 +fi + +usage() { + echo "Usage: thermal_overload_control.sh [option] " + echo + echo "Options:" + echo " -h, --help : to print this message." + echo + echo "Commands:" + echo + echo " cpu: To enabling CPU thermal overload handler" + echo + echo " asic : To enabling ASIC thermal overload handler" + echo +} + +cpu_overload() { + logger "Enable CPU thermal overload control" + set_reg=`echo ${TOVERREG} ${CPUOVER} > ${SETREG_FILE}` +} + +asic_overload() { + logger "Enable ASIC thermal overload control" + set_reg=`echo ${TOVERREG} ${ASICOVER} > ${SETREG_FILE}` +} + +if [ $# -lt 1 ]; then + usage + exit -1 +fi + +case "$command" in +-h | --help) + usage + ;; +cpu) + cpu_overload + ;; +asic) + asic_overload + ;; +*) + usage + exit -1 + ;; +esac + +exit $? diff --git a/device/celestica/x86_64-cel_seastone-r0/thermal_policy.json b/device/celestica/x86_64-cel_seastone-r0/thermal_policy.json new file mode 100644 index 000000000000..f937b6bd4456 --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/thermal_policy.json @@ -0,0 +1,93 @@ +{ + "thermal_control_algorithm": { + "run_at_boot_up": "true" + }, + "info_types": [ + { + "type": "chassis_info" + }, + { + "type": "fan_info" + }, + { + "type": "thermal_info" + } + ], + "policies": [ + { + "name": "any fan absence", + "conditions": [ + { + "type": "fan.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + } + ] + }, + { + "name": "any fan broken", + "conditions": [ + { + "type": "fan.any.fault" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + } + ] + }, + { + "name": "any thermal over threshold", + "conditions": [ + { + "type": "thermal.over.high_threshold" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + } + ] + }, + { + "name": "temp over high critical threshold", + "conditions": [ + { + "type": "thermal.over.high_critical_threshold" + } + ], + "actions": [ + { + "type": "switch.power_cycling" + } + ] + }, + { + "name": "all fan presence / thermal no warning", + "conditions": [ + { + "type": "fan.all.presence" + }, + { + "type": "fan.all.good" + }, + { + "type": "thermal.all.good" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "true" + } + ] + } + ] +} \ No newline at end of file From 751491bcff252ce8990221e226849e5d01da14fb Mon Sep 17 00:00:00 2001 From: Wirut Getbamrung Date: Wed, 2 Dec 2020 15:12:11 +0700 Subject: [PATCH 3/5] [device/celestica]: remove unused fancontrol config on dx010 device --- device/celestica/x86_64-cel_seastone-r0/fancontrol-B2F | 3 +-- device/celestica/x86_64-cel_seastone-r0/fancontrol-F2B | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/device/celestica/x86_64-cel_seastone-r0/fancontrol-B2F b/device/celestica/x86_64-cel_seastone-r0/fancontrol-B2F index 61b1c386f3d8..f3277db14df2 100644 --- a/device/celestica/x86_64-cel_seastone-r0/fancontrol-B2F +++ b/device/celestica/x86_64-cel_seastone-r0/fancontrol-B2F @@ -9,5 +9,4 @@ MINSTOP=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/ MINPWM=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/pwm5=89 13-004d/pwm1=89 13-004d/pwm2=89 13-004d/pwm3=89 13-004d/pwm4=89 13-004d/pwm5=89 MAXPWM=13-002e/pwm1=255 13-002e/pwm2=255 13-002e/pwm3=255 13-002e/pwm4=255 13-002e/pwm5=255 13-004d/pwm1=255 13-004d/pwm2=255 13-004d/pwm3=255 13-004d/pwm4=255 13-004d/pwm5=255 THYST=13-002e/pwm1=3 13-002e/pwm2=3 13-002e/pwm3=3 13-002e/pwm4=3 13-002e/pwm5=3 13-004d/pwm1=3 13-004d/pwm2=3 13-004d/pwm3=3 13-004d/pwm4=3 13-004d/pwm5=3 -MAXTEMPCRIT=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=65 /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=75 -MAXTEMPTYPE=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=ASIC /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=CPU + diff --git a/device/celestica/x86_64-cel_seastone-r0/fancontrol-F2B b/device/celestica/x86_64-cel_seastone-r0/fancontrol-F2B index b851d0a6d6ca..dc67e2623cc2 100644 --- a/device/celestica/x86_64-cel_seastone-r0/fancontrol-F2B +++ b/device/celestica/x86_64-cel_seastone-r0/fancontrol-F2B @@ -9,5 +9,4 @@ MINSTOP=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/ MINPWM=13-002e/pwm1=89 13-002e/pwm2=89 13-002e/pwm3=89 13-002e/pwm4=89 13-002e/pwm5=89 13-004d/pwm1=89 13-004d/pwm2=89 13-004d/pwm3=89 13-004d/pwm4=89 13-004d/pwm5=89 MAXPWM=13-002e/pwm1=255 13-002e/pwm2=255 13-002e/pwm3=255 13-002e/pwm4=255 13-002e/pwm5=255 13-004d/pwm1=255 13-004d/pwm2=255 13-004d/pwm3=255 13-004d/pwm4=255 13-004d/pwm5=255 THYST=13-002e/pwm1=3 13-002e/pwm2=3 13-002e/pwm3=3 13-002e/pwm4=3 13-002e/pwm5=3 13-004d/pwm1=3 13-004d/pwm2=3 13-004d/pwm3=3 13-004d/pwm4=3 13-004d/pwm5=3 -MAXTEMPCRIT=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=75 /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=75 -MAXTEMPTYPE=/sys/bus/i2c/devices/7-004a/hwmon/hwmon*/temp1_input=ASIC /sys/bus/i2c/devices/14-0048/hwmon/hwmon*/temp1_input=CPU + From ca3aca85fe2c4144e0b089547b1bf930fce17a73 Mon Sep 17 00:00:00 2001 From: Wirut Getbamrung Date: Mon, 21 Dec 2020 15:02:26 +0700 Subject: [PATCH 4/5] [device/celestica]: Add healthd configuration --- .../system_health_monitoring_config.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 device/celestica/x86_64-cel_seastone-r0/system_health_monitoring_config.json diff --git a/device/celestica/x86_64-cel_seastone-r0/system_health_monitoring_config.json b/device/celestica/x86_64-cel_seastone-r0/system_health_monitoring_config.json new file mode 100644 index 000000000000..4dc38d035ab4 --- /dev/null +++ b/device/celestica/x86_64-cel_seastone-r0/system_health_monitoring_config.json @@ -0,0 +1,16 @@ +{ + "services_to_ignore": [], + "devices_to_ignore": [ + "asic", + "psu.temperature", + "PSU2 Fan", + "PSU1 Fan" + ], + "user_defined_checkers": [], + "polling_interval": 60, + "led_color": { + "fault": "orange", + "normal": "green", + "booting": "orange_blink" + } +} \ No newline at end of file From 63ddb29bf99c6b547bc6c06d892757da2aa88451 Mon Sep 17 00:00:00 2001 From: Wirut Getbamrung Date: Fri, 15 Jan 2021 15:27:19 +0700 Subject: [PATCH 5/5] [device/celestica-dx010]: clean unnecessary code --- .../x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py index 15ee7ce3fb26..545db861f683 100644 --- a/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py +++ b/device/celestica/x86_64-cel_seastone-r0/sonic_platform/thermal_actions.py @@ -27,8 +27,7 @@ def load_from_json(self, json_obj): :return: """ if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj: - status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower( - ) + status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower() if status_str == 'true': self.status = True elif status_str == 'false':