Skip to content

Commit

Permalink
[xcvrd] Fix y_cable state updates from 'failure' to 'unknown' on erro…
Browse files Browse the repository at this point in the history
…r conditions/events (sonic-net#129)

* [xcvrd] Fix y_cable state update to unknown on erroraneous events

This PR provides the support for replacing the state DB updates from 'failure' to 'unknown' in case there is an error event in the functioning of Y cable
What is the motivation for this PR?
the schema agreed upon with linkmgr and orchagent interaction with xcvrd, is that if there is an error event xcvrd need to fill the state DB with 'unknown' as the state value rather than 'failure', this PR handles that

How did you do it?
identified error scenario's in the code and made the changes

Signed-off-by: vaibhav-dahiya <vdahiya@microsoft.com>
  • Loading branch information
vdahiya12 authored Dec 3, 2020
1 parent 5d74356 commit ab39059
Showing 1 changed file with 178 additions and 76 deletions.
254 changes: 178 additions & 76 deletions sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,19 @@
SFP_STATUS_ERR_BAD_CABLE
}

Y_CABLE_STATUS_NO_TOR_ACTIVE = 0
Y_CABLE_STATUS_TORA_ACTIVE = 1
Y_CABLE_STATUS_TORB_ACTIVE = 2

y_cable_switch_state_values = {
Y_CABLE_STATUS_NO_TOR_ACTIVE,
Y_CABLE_STATUS_TORA_ACTIVE,
Y_CABLE_STATUS_TORB_ACTIVE
}

# Find out the underneath physical port list by logical name


def logical_port_name_to_physical_port_list(port_name):
if port_name.startswith("Ethernet"):
if y_cable_platform_sfputil.is_logical_port(port_name):
Expand All @@ -69,123 +81,194 @@ def _wrapper_get_presence(physical_port):
def delete_port_from_y_cable_table(logical_port_name, y_cable_tbl):
y_cable_tbl._del(logical_port_name)

# Delete port from Y cable status table
def delete_port_from_y_cable_command_table(logical_port_name, y_cable_command_tbl):
y_cable_command_tbl._del(logical_port_name)

def update_table_mux_status_for_response_tbl(table_name, status, logical_port_name):
fvs = swsscommon.FieldValuePairs([('response', status)])
table_name.set(logical_port_name, fvs)


def update_table_mux_status_for_statedb_port_tbl(table_name, status, read_side, active_side, logical_port_name):
fvs = swsscommon.FieldValuePairs([('state', status),
('read_side', str(read_side)),
('active_side', str(active_side))])
table_name.set(logical_port_name, fvs)


def y_cable_toggle_mux_torA(physical_port):
update_status = y_cable.toggle_mux_to_torA(physical_port)
if update_status is True:
return 1
else:
helper_logger.log_warning(
"Error: Could not toggle the mux for port {} to torA write eeprom failed".format(physical_port))
return -1


def y_cable_toggle_mux_torB(physical_port):
update_status = y_cable.toggle_mux_to_torB(physical_port)
if update_status is True:
return 2
else:
helper_logger.log_warning(
"Error: Could not toggle the mux for port {} to torB write eeprom failed".format(physical_port))
return -1


def update_tor_active_side(read_side, status, logical_port_name):
def update_tor_active_side(read_side, state, logical_port_name):
physical_port_list = logical_port_name_to_physical_port_list(
logical_port_name)

if len(physical_port_list) == 1:

physical_port = physical_port_list[0]
if int(read_side) == 1:
if status == "active":
y_cable.toggle_mux_to_torA(physical_port)
return 1
elif status == "standby":
y_cable.toggle_mux_to_torB(physical_port)
return 2
elif int(read_side) == 2:
if status == "active":
y_cable.toggle_mux_to_torB(physical_port)
return 2
elif status == "standby":
y_cable.toggle_mux_to_torA(physical_port)
return 1

# TODO: Should we confirm that the mux was indeed toggled?
if _wrapper_get_presence(physical_port):
if int(read_side) == 1:
if state == "active":
return y_cable_toggle_mux_torA(physical_port)
elif state == "standby":
return y_cable_toggle_mux_torB(physical_port)
elif int(read_side) == 2:
if state == "active":
return y_cable_toggle_mux_torB(physical_port)
elif state == "standby":
return y_cable_toggle_mux_torA(physical_port)

# TODO: Should we confirm that the mux was indeed toggled?

else:
helper_logger.log_warning(
"Error: Could not establish presence for Y cable port {} while trying to toggle the mux".format(logical_port_name))
return -1

else:
# Y cable ports should always have
# one to one mapping of physical-to-logical
# This should not happen
helper_logger.log_warning(
"Error: Retreived multiple ports for a Y cable table".format(logical_port_name))
"Error: Retreived multiple ports for a Y cable table port {} while trying to toggle the mux".format(logical_port_name))
return -1


def update_appdb_port_mux_cable_response_table(logical_port_name, asic_index, appl_db):

status = None
y_cable_response_tbl = {}

y_cable_response_tbl[asic_index] = swsscommon.Table(
appl_db[asic_index], "MUX_CABLE_RESPONSE_TABLE")
physical_port_list = logical_port_name_to_physical_port_list(
logical_port_name)

if len(physical_port_list) == 1:

physical_port = physical_port_list[0]
if _wrapper_get_presence(physical_port):
status = None
y_cable_response_tbl = {}
read_side = y_cable.check_read_side(physical_port)
y_cable_response_tbl[asic_index] = swsscommon.Table(
appl_db[asic_index], swsscommon.APP_MUX_CABLE_RESPONSE_TABLE_NAME)

if not read_side:
status = 'failure'

fvs = swsscommon.FieldValuePairs([('status', status)])
y_cable_response_tbl[asic_index].set(logical_port_name, fvs)
read_side = y_cable.check_read_side(physical_port)
if read_side is None:

status = 'unknown'
update_table_mux_status_for_response_tbl(y_cable_response_tbl[asic_index], status, logical_port_name)
helper_logger.log_warning(
"Error: Could not get read side for mux cable port probe failed {}".format(logical_port_name))
"Error: Could not get read side for mux cable port probe command logical port {} and physical port {}".format(logical_port_name, physical_port))
return

active_side = y_cable.check_active_linked_tor_side(physical_port)
if not active_side:
status = 'failure'

fvs = swsscommon.FieldValuePairs([('status', status)])
y_cable_response_tbl[asic_index].set(logical_port_name, fvs)
if active_side is None:

status = 'unknown'
update_table_mux_status_for_response_tbl(y_cable_response_tbl[asic_index], status, logical_port_name)
helper_logger.log_warning(
"Error: Could not get active side for mux cable port probe failed {}".format(logical_port_name))
"Error: Could not get active side for mux cable port probe command logical port {} and physical port {}".format(logical_port_name, physical_port))
return

if read_side == active_side and (active_side == 1 or active_side == 2):
status = 'active'
elif read_side != active_side and (active_side == 1 or active_side == 2):
status = 'standby'
else:
status = 'inactive'

status = 'unknown'
helper_logger.log_warning(
"Error: Could not get state for mux cable port probe command logical port {} and physical port {}".format(logical_port_name, physical_port))

fvs = swsscommon.FieldValuePairs([('status', status)])
update_table_mux_status_for_response_tbl(y_cable_response_tbl[asic_index], status, logical_port_name)

y_cable_response_tbl[asic_index].set(logical_port_name, fvs)
else:

status = 'unknown'
update_table_mux_status_for_response_tbl(y_cable_response_tbl[asic_index], status, logical_port_name)
helper_logger.log_warning(
"Error: Could not establish presence for Y cable port {}".format(logical_port_name))
"Error: Could not establish presence for Y cable port {} while responding to command probe".format(logical_port_name))
else:
# Y cable ports should always have
# one to one mapping of physical-to-logical
# This should not happen

status = 'unknown'
update_table_mux_status_for_response_tbl(y_cable_response_tbl[asic_index], status, logical_port_name)
helper_logger.log_warning(
"Error: Retreived multiple ports for a Y cable port {}".format(logical_port_name))
"Error: Retreived multiple ports for a Y cable port {} while responding to command probe".format(logical_port_name))


def update_statedb_port_mux_status_table(logical_port_name, mux_config_tbl):
def read_y_cable_and_update_statedb_port_tbl(logical_port_name, mux_config_tbl):
physical_port_list = logical_port_name_to_physical_port_list(
logical_port_name)

read_side = None
active_side = None
status = None
if len(physical_port_list) == 1:

physical_port = physical_port_list[0]
if _wrapper_get_presence(physical_port):
read_side = y_cable.check_read_side(physical_port)
if read_side is None:
read_side = active_side = -1
update_table_mux_status_for_statedb_port_tbl(
mux_config_tbl, "unknown", read_side, active_side, logical_port_name)
helper_logger.log_error(
"Error: Could not establish the read side for Y cable port {}".format(logical_port_name))
return

active_side = y_cable.check_active_linked_tor_side(physical_port)
if read_side == active_side:
if active_side is None or active_side not in y_cable_switch_state_values:
read_side = active_side = -1
update_table_mux_status_for_statedb_port_tbl(
mux_config_tbl, "unknown", read_side, active_side, logical_port_name)
helper_logger.log_error(
"Error: Could not establish the active side for Y cable port {}".format(logical_port_name))
return

if read_side == active_side and (active_side == 1 or active_side == 2):
status = 'active'
elif active_side == 0:
status = 'inactive'
else:
elif read_side != active_side and (active_side == 1 or active_side == 2):
status = 'standby'
else:
status = 'unknown'
helper_logger.log_warning(
"Error: Could not establish the active status for Y cable port {}".format(logical_port_name))

update_table_mux_status_for_statedb_port_tbl(
mux_config_tbl, status, read_side, active_side, logical_port_name)
return

fvs = swsscommon.FieldValuePairs([('status', status),
('read_side', str(read_side)),
('active_side', str(active_side))])
mux_config_tbl.set(logical_port_name, fvs)
else:
read_side = active_side = -1
update_table_mux_status_for_statedb_port_tbl(
mux_config_tbl, "unknown", read_side, active_side, logical_port_name)
helper_logger.log_warning(
"Error: Could not establish presence for Y cable port {}".format(logical_port_name))
else:
# Y cable ports should always have
# one to one mapping of physical-to-logical
# This should not happen
read_side = active_side = -1
update_table_mux_status_for_statedb_port_tbl(
mux_config_tbl, "unknown", read_side, active_side, logical_port_name)
helper_logger.log_warning(
"Error: Retreived multiple ports for a Y cable port {}".format(logical_port_name))

Expand All @@ -202,26 +285,29 @@ def check_identifier_presence_and_update_mux_table_entry(state_db, port_tbl, y_c
# Convert list of tuples to a dictionary
mux_table_dict = dict(fvs)
if "mux_cable" in mux_table_dict:
y_cable_asic_table = y_cable_tbl.get(asic_index, None)
if y_cable_presence[0] is True and y_cable_asic_table is not None:
# fill in the newly found entry
update_statedb_port_mux_status_table(
logical_port_name, y_cable_tbl[asic_index])
val = mux_table_dict.get("mux_cable", None)
if val == "true":

else:
# first create the state db y cable table and then fill in the entry
y_cable_presence[:] = [True]
namespaces = multi_asic.get_front_end_namespaces()
for namespace in namespaces:
asic_id = multi_asic.get_asic_index_from_namespace(
namespace)
state_db[asic_id] = daemon_base.db_connect(
"STATE_DB", namespace)
y_cable_tbl[asic_id] = swsscommon.Table(
state_db[asic_id], swsscommon.STATE_HW_MUX_CABLE_TABLE_NAME)
# fill the newly found entry
update_statedb_port_mux_status_table(
logical_port_name, y_cable_tbl[asic_index])
y_cable_asic_table = y_cable_tbl.get(asic_index, None)
if y_cable_presence[0] is True and y_cable_asic_table is not None:
# fill in the newly found entry
read_y_cable_and_update_statedb_port_tbl(
logical_port_name, y_cable_tbl[asic_index])

else:
# first create the state db y cable table and then fill in the entry
y_cable_presence[:] = [True]
namespaces = multi_asic.get_front_end_namespaces()
for namespace in namespaces:
asic_id = multi_asic.get_asic_index_from_namespace(
namespace)
state_db[asic_id] = daemon_base.db_connect(
"STATE_DB", namespace)
y_cable_tbl[asic_id] = swsscommon.Table(
state_db[asic_id], swsscommon.STATE_HW_MUX_CABLE_TABLE_NAME)
# fill the newly found entry
read_y_cable_and_update_statedb_port_tbl(
logical_port_name, y_cable_tbl[asic_index])


def check_identifier_presence_and_delete_mux_table_entry(state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event):
Expand Down Expand Up @@ -402,7 +488,7 @@ def task_worker(self):
# Connect to STATE_DB and APPL_DB and get both the HW_MUX_STATUS_TABLE info
appl_db, state_db, status_tbl, y_cable_tbl = {}, {}, {}, {}
y_cable_tbl_keys = {}
mux_cable_command_tbl = {}
mux_cable_command_tbl, y_cable_command_tbl = {}, {}

sel = swsscommon.Select()

Expand All @@ -416,6 +502,8 @@ def task_worker(self):
appl_db[asic_id], swsscommon.APP_HW_MUX_CABLE_TABLE_NAME)
mux_cable_command_tbl[asic_id] = swsscommon.SubscriberStateTable(
appl_db[asic_id], swsscommon.APP_MUX_CABLE_COMMAND_TABLE_NAME)
y_cable_command_tbl[asic_id] = swsscommon.Table(
appl_db[asic_id], swsscommon.APP_MUX_CABLE_COMMAND_TABLE_NAME)
state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace)
y_cable_tbl[asic_id] = swsscommon.Table(
state_db[asic_id], swsscommon.STATE_HW_MUX_CABLE_TABLE_NAME)
Expand Down Expand Up @@ -454,29 +542,38 @@ def task_worker(self):

fvp_dict = dict(fvp)

if "status" in fvp_dict:
# got a status change
new_status = fvp_dict["status"]
if "state" in fvp_dict:
# got a state change
new_status = fvp_dict["state"]
(status, fvs) = y_cable_tbl[asic_index].get(port)
if status is False:
helper_logger.log_warning("Could not retreive fieldvalue pairs for {}, inside state_db table {}".format(
port, y_cable_tbl[asic_index]))
continue
mux_port_dict = dict(fvs)
old_status = mux_port_dict.get("status")
old_status = mux_port_dict.get("state")
read_side = mux_port_dict.get("read_side")
prev_active_side = mux_port_dict.get("active_side")
# Now if the old_status does not match new_status toggle the mux appropriately
if old_status != new_status:
active_side = update_tor_active_side(
read_side, new_status, port)
fvs_updated = swsscommon.FieldValuePairs([('status', new_status),
if active_side == -1:
new_status = 'unknown'

fvs_updated = swsscommon.FieldValuePairs([('state', new_status),
('read_side',
read_side),
('active_side', str(active_side))])
y_cable_tbl[asic_index].set(port, fvs_updated)
# nothing to do since no status change
else:
# nothing to do since no status change
active_side = prev_active_side
fvs_updated = swsscommon.FieldValuePairs([('state', new_status),
('read_side',
read_side),
('active_side', str(active_side))])
y_cable_tbl[asic_index].set(port, fvs_updated)
helper_logger.log_warning("Got a change event on that does not toggle the TOR active side for port {} status {} active linked side = {} ".format(
port, old_status, prev_active_side))
else:
Expand All @@ -486,14 +583,19 @@ def task_worker(self):
(port_m, op_m, fvp_m) = mux_cable_command_tbl[asic_index].pop()
if fvp_m:

if port_m not in y_cable_tbl_keys[asic_index]:
continue

fvp_dict = dict(fvp_m)

if "command" in fvp_dict:
#check if xcvrd got a probe command

# check if xcvrd got a probe command
probe_identifier = fvp_dict["command"]

if probe_identifier == "probe":
update_appdb_port_mux_cable_response_table(port_m, asic_index, appl_db)
delete_port_from_y_cable_command_table(port_m, y_cable_command_tbl[asic_index])

def task_run(self):
self.task_thread = threading.Thread(target=self.task_worker)
Expand Down

0 comments on commit ab39059

Please sign in to comment.