Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert the special handling of RJ45 ports #56

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def get_change_event(self, timeout=0):
# Initialize SFP event first
if not self.sfp_event:
from .sfp_event import sfp_event
self.sfp_event = sfp_event(self.RJ45_port_list)
self.sfp_event = sfp_event()
self.sfp_event.initialize()

wait_for_ever = (timeout == 0)
Expand Down
109 changes: 77 additions & 32 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,44 @@ def __exit__(self, exc_type, exc_val, exc_tb):
deinitialize_sdk_handle(self.sdk_handle)


class SFP(SfpOptoeBase):
class NvidiaSFPCommon(SfpOptoeBase):
def __init__(self, sfp_index):
super(NvidiaSFPCommon, self).__init__()
self.index = sfp_index + 1
self.sdk_index = sfp_index

@property
def sdk_handle(self):
if not SFP.shared_sdk_handle:
SFP.shared_sdk_handle = initialize_sdk_handle()
if not SFP.shared_sdk_handle:
logger.log_error('Failed to open SDK handle')
return SFP.shared_sdk_handle

@classmethod
def _get_module_info(self, sdk_handle, sdk_index):
"""
Get error code of the SFP module

Returns:
The error code fetch from SDK API
"""
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)

module_id_info = sx_mgmt_module_id_info_t()
module_id_info.slot_id = 0
module_id_info.module_id = sdk_index
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)

rc = sx_mgmt_phy_module_info_get(sdk_handle, module_id_info_list, 1, module_info_list)
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)

mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
return mod_info.module_state.oper_state, mod_info.module_state.error_type


class SFP(NvidiaSFPCommon):
"""Platform-specific SFP class"""
shared_sdk_handle = None
SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE = 'Long range for non-Mellanox cable or module'
Expand All @@ -250,13 +287,10 @@ class SFP(SfpOptoeBase):
SFP_MLNX_ERROR_BIT_RESERVED = 0x80000000

def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, lc_name=None):
super(SFP, self).__init__()
super(SFP, self).__init__(sfp_index)
self._sfp_type = sfp_type

if slot_id == 0: # For non-modular chassis
self.index = sfp_index + 1
self.sdk_index = sfp_index

from .thermal import initialize_sfp_thermal
self._thermal_list = initialize_sfp_thermal(sfp_index)
else: # For modular chassis
Expand All @@ -281,13 +315,15 @@ def get_mst_pci_device(self):
logger.log_error("Failed to find mst PCI device rc={} err.msg={}".format(e.returncode, e.output))
return device_name

'''
@property
def sdk_handle(self):
if not SFP.shared_sdk_handle:
SFP.shared_sdk_handle = initialize_sdk_handle()
if not SFP.shared_sdk_handle:
logger.log_error('Failed to open SDK handle')
return SFP.shared_sdk_handle
'''

def reinit(self):
"""
Expand Down Expand Up @@ -670,27 +706,6 @@ def is_replaceable(self):
"""
return True

def _get_error_code(self):
"""
Get error code of the SFP module

Returns:
The error code fetch from SDK API
"""
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)

module_id_info = sx_mgmt_module_id_info_t()
module_id_info.slot_id = 0
module_id_info.module_id = self.sdk_index
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)

rc = sx_mgmt_phy_module_info_get(self.sdk_handle, module_id_info_list, 1, module_info_list)
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)

mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
return mod_info.module_state.oper_state, mod_info.module_state.error_type

@classmethod
def _get_error_description_dict(cls):
return {0: cls.SFP_ERROR_DESCRIPTION_POWER_BUDGET_EXCEEDED,
Expand All @@ -711,12 +726,12 @@ def get_error_description(self):
Get error description

Args:
error_code: The error code returned by _get_error_code
error_code: The error code returned by _get_module_info

Returns:
The error description
"""
oper_status, error_code = self._get_error_code()
oper_status, error_code = self._get_module_info(self.sdk_handle, self.sdk_index)
if oper_status == SX_PORT_MODULE_STATUS_INITIALIZING:
error_description = self.SFP_STATUS_INITIALIZING
elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED:
Expand All @@ -736,13 +751,28 @@ def get_error_description(self):
return error_description


class RJ45Port(SfpOptoeBase):
class RJ45Port(NvidiaSFPCommon):
"""class derived from SFP, representing RJ45 ports"""

def __init__(self, sfp_index):
super(RJ45Port, self).__init__()
super(RJ45Port, self).__init__(sfp_index)
self.sfp_type = RJ45_TYPE

@classmethod
def _get_presence(cls, sdk_handle, sdk_index):
"""Class level method to get low power mode.

Args:
sdk_handle: SDK handle
sdk_index (integer): SDK port index
slot_id (integer): Slot ID

Returns:
[boolean]: True if low power mode is on else off
"""
oper_status, _ = cls._get_module_info(sdk_handle, sdk_index)
return print(oper_status == SX_PORT_MODULE_STATUS_PLUGGED)

def get_presence(self):
"""
Retrieves the presence of the device
Expand All @@ -751,7 +781,22 @@ def get_presence(self):
Returns:
bool: True if device is present, False if not
"""
return True
if utils.is_host():
# To avoid performance issue,
# call class level method to avoid initialize the whole sonic platform API
get_presence_code = 'from sonic_platform import sfp;\n' \
'with sfp.SdkHandleContext() as sdk_handle:' \
'print(sfp.RJ45Port._get_presence(sdk_handle, {}))'.format(self.sdk_index)
presence_cmd = "docker exec pmon python3 -c \"{}\"".format(get_presence_code)
try:
output = subprocess.check_output(presence_cmd, shell=True, universal_newlines=True)
return 'True' in output
except subprocess.CalledProcessError as e:
print("Error! Unable to get presence for {}, rc = {}, err msg: {}".format(self.sdk_index, e.returncode, e.output))
return False
else:
oper_status, _ = self._get_module_info(self.sdk_handle, self.sdk_index);
return (oper_status == SX_PORT_MODULE_STATUS_PLUGGED)

def get_transceiver_info(self):
"""
Expand Down Expand Up @@ -838,7 +883,7 @@ def get_error_description(self):
Get error description

Args:
error_code: The error code returned by _get_error_code
error_code: Always false on SN2201

Returns:
The error description
Expand Down
88 changes: 2 additions & 86 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,6 @@ class MockSxFd(object):
SDK_SFP_STATE_DIS: str(SFP.SFP_STATUS_BIT_REMOVED),
}

"""
RJ45 ports events definition
For RJ45, get_present always returns True.
In case an unplug / unknown event is reported for a port, error status is leveraged for representing them.
The following events will be used.
- Unknown: 2147483648 => 0x80000000
- Unplug: 1073741824 => 0x40000000
According to the error status design, the upper half (bits 16 ~ 31) are the events are encoded from bit 16 (0x0001000),
using those numbers will avoid conflict as much as possible
"""
RJ45_UNPLUG_EVENT = '1073741824'
RJ45_UNKNOWN_EVENT = '2147483648'

# system level event/error
EVENT_ON_ALL_SFP = '-1'
SYSTEM_NOT_READY = 'system_not_ready'
Expand All @@ -147,17 +134,14 @@ class sfp_event:
SX_OPEN_TIMEOUT = 5
SELECT_TIMEOUT = 1

def __init__(self, rj45_port_list=None):
def __init__(self):
self.swid = 0
self.handle = None

# Allocate SDK fd and user channel structures
self.rx_fd_p = new_sx_fd_t_p()
self.user_channel_p = new_sx_user_channel_t_p()

self.RJ45_port_list = rj45_port_list
self.absent_ports_before_init = []

def initialize(self):
swid_cnt_p = None

Expand Down Expand Up @@ -221,30 +205,6 @@ def initialize(self):

if rc != SX_STATUS_SUCCESS:
raise RuntimeError("sx_api_host_ifc_trap_id_register_set failed with rc {}, exiting...".format(rc))

if self.RJ45_port_list:
# Fetch the present state of each RJ45 port.
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)
absent_port_list = []

for i in self.RJ45_port_list:
module_id_info = sx_mgmt_module_id_info_t()
module_id_info.slot_id = 0
module_id_info.module_id = i
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)

rc = sx_mgmt_phy_module_info_get(self.handle, module_id_info_list, 1, module_info_list)
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)

mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
if mod_info.module_state.oper_state not in [SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED]:
absent_port_list.append(i)

self.absent_ports_before_init = absent_port_list

delete_sx_mgmt_module_id_info_t_arr(module_id_info_list)
delete_sx_mgmt_phy_module_info_t_arr(module_info_list)
except Exception as e:
logger.log_error("sfp_event initialization failed due to {}, exiting...".format(repr(e)))
if swid_cnt_p is not None:
Expand Down Expand Up @@ -294,15 +254,6 @@ def check_sfp_status(self, port_change, error_dict, timeout):
to repeat calling it with timeout = 0 in a loop until no new notification read (in this case it returns false).
by doing so all the notifications in the fd can be retrieved through a single call to get_change_event.
"""
if self.absent_ports_before_init:
# This is the first time this method is called.
# We should return the ports that are not present during initialization
for i in self.absent_ports_before_init:
error_dict[i + 1] = 'Not present'
port_change[i + 1] = RJ45_UNPLUG_EVENT
self.absent_ports_before_init = []
return True

found = 0

try:
Expand Down Expand Up @@ -333,16 +284,7 @@ def check_sfp_status(self, port_change, error_dict, timeout):
# 3. and then the sfp module is removed
# 4. sfp_event starts to try fetching the change event
# in this case found is increased so that True will be returned
# For RJ45 ports, we will report "unknown" event anyway since it's legal
reported = 0
if self.RJ45_port_list:
for port in port_list:
if port in self.RJ45_port_list:
port_change[port+1] = sfp_state
reported += 1

if not reported:
logger.log_info("unknown module state {}, maybe the port suffers two adjacent insertion/removal".format(module_state))
logger.log_info("unknown module state {}, maybe the port suffers two adjacent insertion/removal".format(module_state))
found += 1
continue

Expand Down Expand Up @@ -373,32 +315,6 @@ def check_sfp_status(self, port_change, error_dict, timeout):
error_dict[port+1] = error_description
found += 1

if self.RJ45_port_list:
# Translate any plugin/plugout event into error dict for RJ45 ports
unknown_port = set()
unplug_port = set()
for index, event in port_change.items():
if index in self.RJ45_port_list:
# Remove it from port event
# check if it's unknown event
if event == '0': # Remove event
unplug_port.add(index)
elif event != '1':
unknown_port.add(index)
# This is to leverage TRANSCEIVER_STATUS table to represent 'Not present' and 'Unknown' state
# The event should satisfies:
# - Vendor specific error bit
# - Non-blocking bit
# Currently, the 2 MSBs are reserved for this since they are most unlikely to be used in the near future
for index in unknown_port:
# Bit 31 for unknown
port_change[index] = RJ45_UNKNOWN_EVENT
error_dict[index] = 'Unknown'
for index in unplug_port:
# Bit 30 for not present
port_change[index] = RJ45_UNPLUG_EVENT
error_dict[index] = 'Not present'

return found != 0

def on_pmpe(self, fd_p):
Expand Down
3 changes: 2 additions & 1 deletion platform/mellanox/mlnx-platform-api/tests/test_sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def test_sfp_index(self, mock_max_port):
assert sfp.index == 5

@mock.patch('sonic_platform.sfp.SFP.read_eeprom', mock.MagicMock(return_value=None))
@mock.patch('sonic_platform.sfp.SFP._get_error_code')
@mock.patch('sonic_platform.sfp.SFP.shared_sdk_handle', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.sfp.SFP._get_module_info')
@mock.patch('sonic_platform.chassis.Chassis.get_num_sfps', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[]))
def test_sfp_get_error_status(self, mock_get_error_code):
Expand Down
45 changes: 1 addition & 44 deletions platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setup_class(cls):
os.environ["MLNX_PLATFORM_API_UNIT_TESTING"] = "1"

@patch('select.select', MagicMock(return_value=([99], None, None)))
def test_check_sfp_status_xsfp(self):
def test_check_sfp_status(self):
from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT, SDK_SFP_STATE_ERR
from sonic_platform.sfp_event import SDK_ERRORS_TO_ERROR_BITS, SDK_ERRORS_TO_DESCRIPTION, SDK_SFP_BLOCKING_ERRORS

Expand Down Expand Up @@ -59,46 +59,3 @@ def executor(self, mock_module_state, mock_error_type, expect_status, descriptio
if description:
assert 1 in error_dict and error_dict[1] == description
assert 2 in error_dict and error_dict[2] == description

@patch('select.select', MagicMock(return_value=([99], None, None)))
def test_check_sfp_status_rj45(self):
from sonic_platform.sfp_event import sfp_event
from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT
from sonic_platform.sfp_event import RJ45_UNPLUG_EVENT, RJ45_UNKNOWN_EVENT

# Verify absent ports before initialization
event = sfp_event([0, 1, 2, 3])
event.absent_ports_before_init = [0,1]
port_change = {}
error_dict = {}
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {1: RJ45_UNPLUG_EVENT, 2: RJ45_UNPLUG_EVENT}
assert error_dict == {1: 'Not present', 2: 'Not present'}

# Unplug event
event.on_pmpe = MagicMock(return_value=(True, [2], SDK_SFP_STATE_OUT, None))
port_change.clear()
error_dict.clear()
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {3: RJ45_UNPLUG_EVENT}
assert error_dict == {3: 'Not present'}

# Plug event
event.on_pmpe = MagicMock(return_value=(True, [2], SDK_SFP_STATE_IN, None))
port_change.clear()
error_dict.clear()
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {3: str(SDK_SFP_STATE_IN)}
assert error_dict == {}

# Unknown event
event.on_pmpe = MagicMock(return_value=(True, [2], -1, None))
port_change.clear()
error_dict.clear()
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {3: RJ45_UNKNOWN_EVENT}
assert error_dict == {3: 'Unknown'}