Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[action] [PR:19473] [Mellanox] implement state machine for always firmware control ports (#19473) #19688

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@
"comex_amb": False,
"pch_temp": True
}
},
'sfp': {
'fw_control_ports': [64, 65] # 0 based sfp index list
}
},
'x86_64-nvidia_sn5600-r0': {
Expand All @@ -134,6 +137,9 @@
"comex_amb": False,
"pch_temp": True
}
},
'sfp': {
'fw_control_ports': [64] # 0 based sfp index list
}
}
}
Expand Down Expand Up @@ -299,3 +305,16 @@ def get_watchdog_max_period(cls):
return DEFAULT_WD_PERIOD

return watchdog_data.get('max_period', None)

@classmethod
@utils.read_only_cache()
def get_always_fw_control_ports(cls):
platform_data = DEVICE_DATA.get(cls.get_platform_name())
if not platform_data:
return None

sfp_data = platform_data.get('sfp')
if not sfp_data:
return None

return sfp_data.get('fw_control_ports')
53 changes: 42 additions & 11 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,14 @@
ACTION_ON_FW_CONTROL = 'On Firmware Control'
ACTION_ON_POWER_LIMIT_ERROR = 'On Power Limit Error'
ACTION_ON_CANCEL_WAIT = 'On Cancel Wait'

# States/actions for always firmware control ports
STATE_FCP_DOWN = 'Down(Firmware Control)'
STATE_FCP_INIT = 'Initializing(Firmware Control)'
STATE_FCP_NOT_PRESENT = 'Not Present(Firmware Control)'
STATE_FCP_PRESENT = 'Present(Firmware Control)'

ACTION_FCP_ON_START = 'On Start(Firmware Control)'
# Module host management definitions end

# SFP EEPROM limited bytes
Expand Down Expand Up @@ -463,7 +471,11 @@ def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, l
self.slot_id = slot_id
self._sfp_type_str = None
# SFP state, only applicable for module host management
self.state = STATE_DOWN
fw_control_ports = DeviceDataManager.get_always_fw_control_ports()
if not fw_control_ports or self.sdk_index not in fw_control_ports:
self.state = STATE_DOWN
else:
self.state = STATE_FCP_DOWN

def __str__(self):
return f'SFP {self.sdk_index}'
Expand Down Expand Up @@ -1431,7 +1443,7 @@ def get_state_machine(cls):
sm.add_state(STATE_POWER_LIMIT_ERROR).set_entry_action(ACTION_ON_POWER_LIMIT_ERROR) \
.add_transition(EVENT_POWER_GOOD, STATE_POWERED_ON) \
.add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT)

cls.action_table = {}
cls.action_table[ACTION_ON_START] = cls.action_on_start
cls.action_table[ACTION_ON_RESET] = cls.action_on_reset
Expand All @@ -1441,6 +1453,16 @@ def get_state_machine(cls):
cls.action_table[ACTION_ON_CANCEL_WAIT] = cls.action_on_cancel_wait
cls.action_table[ACTION_ON_POWER_LIMIT_ERROR] = cls.action_on_power_limit_error

# For always firewire control ports
sm.add_state(STATE_FCP_DOWN).add_transition(EVENT_START, STATE_FCP_INIT)
sm.add_state(STATE_FCP_INIT).set_entry_action(ACTION_FCP_ON_START) \
.add_transition(EVENT_NOT_PRESENT, STATE_FCP_NOT_PRESENT) \
.add_transition(EVENT_PRESENT, STATE_FCP_PRESENT)
sm.add_state(STATE_FCP_NOT_PRESENT).add_transition(EVENT_PRESENT, STATE_FCP_PRESENT)
sm.add_state(STATE_FCP_PRESENT).add_transition(EVENT_NOT_PRESENT, STATE_FCP_NOT_PRESENT)

cls.action_table[ACTION_FCP_ON_START] = cls.action_fcp_on_start

cls.sm = sm

return cls.sm
Expand Down Expand Up @@ -1469,6 +1491,14 @@ def action_on_start(cls, sfp):
sfp.on_event(EVENT_RESET)
else:
sfp.on_event(EVENT_POWER_ON)

@classmethod
def action_fcp_on_start(cls, sfp):
present = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{sfp.sdk_index}/present')
if present:
sfp.on_event(EVENT_PRESENT)
else:
sfp.on_event(EVENT_NOT_PRESENT)

@classmethod
def action_on_reset(cls, sfp):
Expand Down Expand Up @@ -1565,10 +1595,12 @@ def in_stable_state(self):
Returns:
bool: True if the module is in a stable state
"""
return self.state in (STATE_NOT_PRESENT, STATE_SW_CONTROL, STATE_FW_CONTROL, STATE_POWER_BAD, STATE_POWER_LIMIT_ERROR)
return self.state in (STATE_NOT_PRESENT, STATE_SW_CONTROL, STATE_FW_CONTROL,
STATE_POWER_BAD, STATE_POWER_LIMIT_ERROR, STATE_FCP_NOT_PRESENT,
STATE_FCP_PRESENT)

def get_fds_for_poling(self):
if self.state == STATE_FW_CONTROL:
if self.state == STATE_FW_CONTROL or self.state == STATE_FCP_NOT_PRESENT or self.state == STATE_FCP_PRESENT:
return {
'present': self.get_fd('present')
}
Expand All @@ -1584,11 +1616,9 @@ def fill_change_event(self, port_dict):
Args:
port_dict (dict): {<sfp_index>:<sfp_state>}
"""
if self.state == STATE_NOT_PRESENT:
if self.state == STATE_NOT_PRESENT or self.state == STATE_FCP_NOT_PRESENT:
port_dict[self.sdk_index + 1] = SFP_STATUS_REMOVED
elif self.state == STATE_SW_CONTROL:
port_dict[self.sdk_index + 1] = SFP_STATUS_INSERTED
elif self.state == STATE_FW_CONTROL:
elif self.state == STATE_SW_CONTROL or self.state == STATE_FW_CONTROL or self.state == STATE_FCP_PRESENT:
port_dict[self.sdk_index + 1] = SFP_STATUS_INSERTED
elif self.state == STATE_POWER_BAD or self.state == STATE_POWER_LIMIT_ERROR:
sfp_state = SFP.SFP_ERROR_BIT_POWER_BUDGET_EXCEEDED | SFP.SFP_STATUS_BIT_INSERTED
Expand All @@ -1607,7 +1637,7 @@ def refresh_poll_obj(self, poll_obj, all_registered_fds):
# find fds registered by this SFP
current_registered_fds = {item[2]: (fileno, item[1]) for fileno, item in all_registered_fds.items() if item[0] == self.sdk_index}
logger.log_debug(f'SFP {self.sdk_index} registered fds are: {current_registered_fds}')
if self.state == STATE_FW_CONTROL:
if self.state == STATE_FW_CONTROL or self.state == STATE_FCP_NOT_PRESENT or self.state == STATE_FCP_PRESENT:
target_poll_types = ['present']
else:
target_poll_types = ['hw_present', 'power_good']
Expand Down Expand Up @@ -1643,9 +1673,10 @@ def is_dummy_event(self, fd_type, fd_value):
"""
if fd_type == 'hw_present' or fd_type == 'present':
if fd_value == int(SFP_STATUS_INSERTED):
return self.state in (STATE_SW_CONTROL, STATE_FW_CONTROL, STATE_POWER_BAD, STATE_POWER_LIMIT_ERROR)
return self.state in (STATE_SW_CONTROL, STATE_FW_CONTROL, STATE_POWER_BAD,
STATE_POWER_LIMIT_ERROR, STATE_FCP_PRESENT)
elif fd_value == int(SFP_STATUS_REMOVED):
return self.state == STATE_NOT_PRESENT
return self.state in (STATE_NOT_PRESENT, STATE_FCP_NOT_PRESENT)
elif fd_type == 'power_good':
if fd_value == 1:
return self.state in (STATE_SW_CONTROL, STATE_NOT_PRESENT, STATE_RESETTING)
Expand Down
13 changes: 13 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_sfp_sm.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,16 @@ def test_sw_control(self):
s.disable_tx_for_sff_optics = mock.MagicMock()
s.on_event(sfp.EVENT_START)
assert s.get_state() == sfp.STATE_SW_CONTROL

@mock.patch('sonic_platform.device_data.DeviceDataManager.get_always_fw_control_ports', mock.MagicMock(return_value=[0]))
def test_fcp_state(self):
self.mock_value('present', 1)
s = sfp.SFP(0)
s.on_event(sfp.EVENT_START)
assert s.get_state() == sfp.STATE_FCP_PRESENT

self.mock_value('present', 0)
s = sfp.SFP(0)
s.on_event(sfp.EVENT_START)
assert s.get_state() == sfp.STATE_FCP_NOT_PRESENT

Loading