Skip to content

Commit

Permalink
Enable periodic polling of TRANSCEIVER_FIRMWARE_INFO table in DomInfo…
Browse files Browse the repository at this point in the history
…UpdateTask (#443)

* Enable periodic polling of TRANSCEIVER_FIRMWARE_INFO table in DomInfoUpdateTask

Signed-off-by: Mihir Patel <patelmi@microsoft.com>

* Addressed testcase failure

* Renamed dom_status to dom_polling

---------

Signed-off-by: Mihir Patel <patelmi@microsoft.com>
  • Loading branch information
mihirpat1 authored Mar 6, 2024
1 parent 55b5805 commit 8829614
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 18 deletions.
46 changes: 42 additions & 4 deletions sonic-xcvrd/tests/test_xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,39 @@ def test_CmisManagerTask_task_worker(self, mock_chassis):
assert mock_xcvr_api.tx_disable_channel.call_count == 2
assert task.port_dict['Ethernet0']['cmis_state'] == 'DP_ACTIVATION'

@pytest.mark.parametrize("lport, expected_dom_polling", [
('Ethernet0', 'disabled'),
('Ethernet4', 'disabled'),
('Ethernet8', 'disabled'),
('Ethernet12', 'disabled'),
('Ethernet16', 'enabled'),
('Ethernet20', 'enabled')
])
def test_DomInfoUpdateTask_get_dom_polling_from_config_db(self, lport, expected_dom_polling):
# Define the mock_get function inside the test function
def mock_get(key):
if key in ['Ethernet4', 'Ethernet8', 'Ethernet12', 'Ethernet16']:
return (True, [('dom_polling', 'enabled')])
elif key == 'Ethernet0':
return (True, [('dom_polling', 'disabled')])
else:
return None

port_mapping = PortMapping()
stop_event = threading.Event()
task = DomInfoUpdateTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
task.xcvr_table_helper = XcvrTableHelper(DEFAULT_NAMESPACE)
task.port_mapping.handle_port_change_event(PortChangeEvent('Ethernet4', 1, 0, PortChangeEvent.PORT_ADD))
task.port_mapping.handle_port_change_event(PortChangeEvent('Ethernet12', 1, 0, PortChangeEvent.PORT_ADD))
task.port_mapping.handle_port_change_event(PortChangeEvent('Ethernet8', 1, 0, PortChangeEvent.PORT_ADD))
task.port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD))
task.port_mapping.handle_port_change_event(PortChangeEvent('Ethernet16', 2, 0, PortChangeEvent.PORT_ADD))
cfg_port_tbl = MagicMock()
cfg_port_tbl.get = MagicMock(side_effect=mock_get)
task.xcvr_table_helper.get_cfg_port_tbl = MagicMock(return_value=cfg_port_tbl)

assert task.get_dom_polling_from_config_db(lport) == expected_dom_polling

@patch('xcvrd.xcvrd.XcvrTableHelper', MagicMock())
@patch('xcvrd.xcvrd.delete_port_from_status_table_hw')
def test_DomInfoUpdateTask_handle_port_change_event(self, mock_del_status_tbl_hw):
Expand Down Expand Up @@ -1496,6 +1529,7 @@ def test_DomInfoUpdateTask_task_run_stop(self):

@patch('xcvrd.xcvrd.XcvrTableHelper', MagicMock())
@patch('xcvrd.xcvrd_utilities.sfp_status_helper.detect_port_in_error_status')
@patch('xcvrd.xcvrd.post_port_sfp_firmware_info_to_db')
@patch('xcvrd.xcvrd.post_port_dom_info_to_db')
@patch('swsscommon.swsscommon.Select.addSelectable', MagicMock())
@patch('swsscommon.swsscommon.SubscriberStateTable')
Expand All @@ -1504,10 +1538,10 @@ def test_DomInfoUpdateTask_task_run_stop(self):
@patch('xcvrd.xcvrd.post_port_pm_info_to_db')
def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_status_hw,
mock_select, mock_sub_table,
mock_post_dom_info, mock_detect_error):
mock_post_dom_info, mock_post_firmware_info, mock_detect_error):
mock_selectable = MagicMock()
mock_selectable.pop = MagicMock(
side_effect=[('Ethernet0', swsscommon.SET_COMMAND, (('index', '1'), )), (None, None, None), (None, None, None)])
side_effect=[('Ethernet0', swsscommon.SET_COMMAND, (('index', '1'), )), (None, None, None), (None, None, None), (None, None, None)])
mock_select.return_value = (swsscommon.Select.OBJECT, mock_selectable)
mock_sub_table.return_value = mock_selectable

Expand All @@ -1516,18 +1550,22 @@ def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_stat
task = DomInfoUpdateTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
task.xcvr_table_helper = XcvrTableHelper(DEFAULT_NAMESPACE)
task.task_stopping_event.wait = MagicMock(side_effect=[False, True])
task.get_dom_polling_from_config_db = MagicMock(return_value='enabled')
mock_detect_error.return_value = True
task.task_worker()
assert task.port_mapping.logical_port_list.count('Ethernet0')
assert task.port_mapping.get_asic_id_for_logical_port('Ethernet0') == 0
assert task.port_mapping.get_physical_to_logical(1) == ['Ethernet0']
assert task.port_mapping.get_logical_to_physical('Ethernet0') == [1]
assert mock_post_firmware_info.call_count == 0
assert mock_post_dom_info.call_count == 0
assert mock_update_status_hw.call_count == 0
assert mock_post_pm_info.call_count == 0
mock_detect_error.return_value = False
task.task_stopping_event.wait = MagicMock(side_effect=[False, True])
task.task_stopping_event.wait = MagicMock(side_effect=[False, False, True])
task.get_dom_polling_from_config_db = MagicMock(side_effect=('disabled', 'enabled'))
task.task_worker()
assert mock_post_firmware_info.call_count == 1
assert mock_post_dom_info.call_count == 1
assert mock_update_status_hw.call_count == 1
assert mock_post_pm_info.call_count == 1
Expand Down Expand Up @@ -1727,7 +1765,7 @@ def test_SfpStateUpdateTask_task_worker(self, mock_del_status_hw,
assert mock_update_status.call_count == 1
assert mock_post_sfp_info.call_count == 1
assert mock_post_dom_th.call_count == 1
assert mock_post_firmware_info.call_count == 1
assert mock_post_firmware_info.call_count == 0
assert mock_update_media_setting.call_count == 1

stop_event.is_set = MagicMock(side_effect=[False, True])
Expand Down
79 changes: 65 additions & 14 deletions sonic-xcvrd/xcvrd/xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import traceback
import ctypes

from natsort import natsorted
from sonic_py_common import daemon_base, device_info, logger
from sonic_py_common import multi_asic
from swsscommon import swsscommon
Expand Down Expand Up @@ -521,7 +522,7 @@ def post_port_sfp_info_to_db(logical_port_name, port_mapping, table, transceiver
# Update port sfp firmware info in db

def post_port_sfp_firmware_info_to_db(logical_port_name, port_mapping, table,
stop_event=threading.Event()):
stop_event=threading.Event(), firmware_info_cache=None):
for physical_port, physical_port_name in get_physical_port_name_dict(logical_port_name, port_mapping).items():
if stop_event.is_set():
break
Expand All @@ -530,8 +531,15 @@ def post_port_sfp_firmware_info_to_db(logical_port_name, port_mapping, table,
continue

try:
transceiver_firmware_info_dict = _wrapper_get_transceiver_firmware_info(physical_port)
if transceiver_firmware_info_dict is not None:
if firmware_info_cache is not None and physical_port in firmware_info_cache:
# If cache is enabled and firmware information is in cache, just read from cache, no need read from EEPROM
transceiver_firmware_info_dict = firmware_info_cache[physical_port]
else:
transceiver_firmware_info_dict = _wrapper_get_transceiver_firmware_info(physical_port)
if firmware_info_cache is not None:
# If cache is enabled, put firmware information to cache
firmware_info_cache[physical_port] = transceiver_firmware_info_dict
if transceiver_firmware_info_dict:
fvs = swsscommon.FieldValuePairs([(k, v) for k, v in transceiver_firmware_info_dict.items()])
table.set(physical_port_name, fvs)
else:
Expand Down Expand Up @@ -1580,34 +1588,82 @@ def __init__(self, namespaces, port_mapping, main_thread_stop_event):
self.port_mapping = copy.deepcopy(port_mapping)
self.namespaces = namespaces

def get_dom_polling_from_config_db(self, lport):
"""
Returns the value of dom_polling field from PORT table in CONFIG_DB
For non-breakout ports, this function will get dom_polling field from PORT table of lport (subport = 0)
For breakout ports, this function will get dom_polling field from PORT table of the first subport
of lport's correpsonding breakout group (subport = 1)
Returns:
'disabled' if dom_polling is set to 'disabled', otherwise 'enabled'
"""
dom_polling = 'enabled'

pport_list = self.port_mapping.get_logical_to_physical(lport)
if not pport_list:
helper_logger.log_warning("Get dom disabled: Got unknown physical port list {} for lport {}".format(pport_list, lport))
return dom_polling
pport = pport_list[0]

logical_port_list = self.port_mapping.get_physical_to_logical(pport)
if logical_port_list is None:
helper_logger.log_warning("Get dom disabled: Got unknown FP port index {}".format(pport))
return dom_polling

# Sort the logical port list to make sure we always get the first subport
logical_port_list = natsorted(logical_port_list, key=lambda y: y.lower())
first_logical_port = logical_port_list[0]

asic_index = self.port_mapping.get_asic_id_for_logical_port(first_logical_port)
port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index)

found, port_info = port_tbl.get(first_logical_port)
if found and 'dom_polling' in dict(port_info):
dom_polling = dict(port_info)['dom_polling']

return dom_polling

def is_port_dom_monitoring_disabled(self, logical_port_name):
return self.get_dom_polling_from_config_db(logical_port_name) == 'disabled'

def task_worker(self):
self.xcvr_table_helper = XcvrTableHelper(self.namespaces)
helper_logger.log_info("Start DOM monitoring loop")
firmware_info_cache = {}
dom_info_cache = {}
dom_th_info_cache = {}
transceiver_status_cache = {}
pm_info_cache = {}
sel, asic_context = port_event_helper.subscribe_port_config_change(self.namespaces)

# Start loop to update dom info in DB periodically
while not self.task_stopping_event.wait(DOM_INFO_UPDATE_PERIOD_SECS):
# Clear the cache at the begin of the loop to make sure it will be clear each time
firmware_info_cache.clear()
dom_info_cache.clear()
dom_th_info_cache.clear()
transceiver_status_cache.clear()
pm_info_cache.clear()

# Handle port change event from main thread
port_event_helper.handle_port_config_change(sel, asic_context, self.task_stopping_event, self.port_mapping, helper_logger, self.on_port_config_change)
logical_port_list = self.port_mapping.logical_port_list
for logical_port_name in logical_port_list:
if self.is_port_dom_monitoring_disabled(logical_port_name):
continue

# Get the asic to which this port belongs
asic_index = self.port_mapping.get_asic_id_for_logical_port(logical_port_name)
if asic_index is None:
helper_logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name))
continue

if not sfp_status_helper.detect_port_in_error_status(logical_port_name, self.xcvr_table_helper.get_status_tbl(asic_index)):
try:
post_port_sfp_firmware_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_firmware_info_tbl(asic_index), self.task_stopping_event, firmware_info_cache=firmware_info_cache)
except (KeyError, TypeError) as e:
#continue to process next port since execption could be raised due to port reset, transceiver removal
helper_logger.log_warning("Got exception {} while processing firmware info for port {}, ignored".format(repr(e), logical_port_name))
continue
try:
post_port_dom_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index), self.task_stopping_event, dom_info_cache=dom_info_cache)
except (KeyError, TypeError) as e:
Expand Down Expand Up @@ -1665,16 +1721,16 @@ def on_remove_logical_port(self, port_change_event):
Args:
port_change_event (object): port change event
"""
# To avoid race condition, remove the entry TRANSCEIVER_DOM_SENSOR, TRANSCEIVER_PM and HW section of TRANSCEIVER_STATUS table.
# This thread only updates TRANSCEIVER_DOM_SENSOR, TRANSCEIVER_PM and HW section of TRANSCEIVER_STATUS table,
# so we don't have to remove entries from TRANSCEIVER_INFO, TRANSCEIVER_FIRMWARE_INFO and TRANSCEIVER_DOM_THRESHOLD
# To avoid race condition, remove the entry TRANSCEIVER_FIRMWARE_INFO, TRANSCEIVER_DOM_SENSOR, TRANSCEIVER_PM and HW section of TRANSCEIVER_STATUS table.
# This thread only updates TRANSCEIVER_FIRMWARE_INFO, TRANSCEIVER_DOM_SENSOR, TRANSCEIVER_PM and HW section of TRANSCEIVER_STATUS table,
# so we don't have to remove entries from TRANSCEIVER_INFO and TRANSCEIVER_DOM_THRESHOLD
del_port_sfp_dom_info_from_db(port_change_event.port_name,
self.port_mapping,
None,
self.xcvr_table_helper.get_dom_tbl(port_change_event.asic_id),
None,
self.xcvr_table_helper.get_pm_tbl(port_change_event.asic_id),
None)
self.xcvr_table_helper.get_firmware_info_tbl(port_change_event.asic_id))
delete_port_from_status_table_hw(port_change_event.port_name,
self.port_mapping,
self.xcvr_table_helper.get_status_tbl(port_change_event.asic_id))
Expand Down Expand Up @@ -1752,7 +1808,6 @@ def _post_port_sfp_info_and_dom_thr_to_db_once(self, port_mapping, xcvr_table_he
rc = post_port_sfp_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_intf_tbl(asic_index), transceiver_dict, stop_event)
if rc != SFP_EEPROM_NOT_READY:
post_port_dom_threshold_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_dom_threshold_tbl(asic_index), stop_event)
post_port_sfp_firmware_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_firmware_info_tbl(asic_index), stop_event)

# Do not notify media settings during warm reboot to avoid dataplane traffic impact
if is_warm_start == False:
Expand Down Expand Up @@ -1978,7 +2033,6 @@ def task_worker(self, stopping_event, sfp_error_event):

if rc != SFP_EEPROM_NOT_READY:
post_port_dom_threshold_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index))
post_port_sfp_firmware_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_firmware_info_tbl(asic_index))
media_settings_parser.notify_media_setting(logical_port, transceiver_dict, self.xcvr_table_helper.get_app_port_tbl(asic_index), self.xcvr_table_helper.get_cfg_port_tbl(asic_index), self.port_mapping)
transceiver_dict.clear()
elif value == sfp_status_helper.SFP_STATUS_REMOVED:
Expand Down Expand Up @@ -2147,7 +2201,6 @@ def on_add_logical_port(self, port_change_event):
status_tbl = self.xcvr_table_helper.get_status_tbl(port_change_event.asic_id)
int_tbl = self.xcvr_table_helper.get_intf_tbl(port_change_event.asic_id)
dom_threshold_tbl = self.xcvr_table_helper.get_dom_threshold_tbl(port_change_event.asic_id)
firmware_info_tbl = self.xcvr_table_helper.get_firmware_info_tbl(port_change_event.asic_id)

error_description = 'N/A'
status = None
Expand Down Expand Up @@ -2182,7 +2235,6 @@ def on_add_logical_port(self, port_change_event):
self.retry_eeprom_set.add(port_change_event.port_name)
else:
post_port_dom_threshold_info_to_db(port_change_event.port_name, self.port_mapping, dom_threshold_tbl)
post_port_sfp_firmware_info_to_db(port_change_event.port_name, self.port_mapping, firmware_info_tbl)
media_settings_parser.notify_media_setting(port_change_event.port_name, transceiver_dict, self.xcvr_table_helper.get_app_port_tbl(port_change_event.asic_id), self.xcvr_table_helper.get_cfg_port_tbl(port_change_event.asic_id), self.port_mapping)
else:
status = sfp_status_helper.SFP_STATUS_REMOVED if not status else status
Expand All @@ -2209,7 +2261,6 @@ def retry_eeprom_reading(self):
rc = post_port_sfp_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_intf_tbl(asic_index), transceiver_dict)
if rc != SFP_EEPROM_NOT_READY:
post_port_dom_threshold_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index))
post_port_sfp_firmware_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_firmware_info_tbl(asic_index))
media_settings_parser.notify_media_setting(logical_port, transceiver_dict, self.xcvr_table_helper.get_app_port_tbl(asic_index), self.xcvr_table_helper.get_cfg_port_tbl(asic_index), self.port_mapping)
transceiver_dict.clear()
retry_success_set.add(logical_port)
Expand Down

0 comments on commit 8829614

Please sign in to comment.