Skip to content

Commit 4edcc4d

Browse files
stepanblyschaklguohan
authored andcommitted
[portsorch] use ingress/egress disable for LAG member enable/disable (sonic-net#1166)
Originally portsorch was designed to remove LAG member from LAG when member gets disabled by teamd. This could lead to potential issues including flood to that port and loops, since after removal member becomes a switch port. Now, portsorch will make use of SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE and SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE to control collection/distribution through that LAG member port. With this new flow, teammgrd and teamsyncd are candidates to be refactored, especially teamsyncd warm reboot logic, since now we don't need to compare old, new lags and lag members. Teamsyncd's job is simply to signal "status" field update without waiting for reconciliation timer to expire. e.g. one port channel went down on peer: ``` admin@arc-switch1025:~$ show interfaces portchannel Flags: A - active, I - inactive, Up - up, Dw - Down, N/A - not available, S - selected, D - deselected, * - not synced No. Team Dev Protocol Ports ----- --------------- ----------- -------------- 0001 PortChannel0001 LACP(A)(Up) Ethernet112(S) 0002 PortChannel0002 LACP(A)(Up) Ethernet116(S) 0003 PortChannel0003 LACP(A)(Up) Ethernet120(S) 0004 PortChannel0004 LACP(A)(Dw) Ethernet124(D) admin@arc-switch1025:~$ docker exec -it syncd sx_api_lag_dump.py LAG Members Table =============================================================================================================== | SWID| LAG Logical Port| LAG Oper State| PVID| Member Port ID| Port Label| Collector| Distributor| Oper State| =============================================================================================================== | 0 0x10000000 UP 1| 0x11900| 29| Enable| Enable| UP| =============================================================================================================== | 0 0x10000100 UP 1| 0x11b00| 30| Enable| Enable| UP| =============================================================================================================== | 0 0x10000200 UP 1| 0x11d00| 31| Enable| Enable| UP| =============================================================================================================== | 0 0x10000300 DOWN 1| 0x11f00| 32| Disable| Disable| UP| =============================================================================================================== ``` Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>
1 parent 15441fe commit 4edcc4d

File tree

3 files changed

+115
-23
lines changed

3 files changed

+115
-23
lines changed

orchagent/portsorch.cpp

+78-16
Original file line numberDiff line numberDiff line change
@@ -2494,39 +2494,51 @@ void PortsOrch::doLagMemberTask(Consumer &consumer)
24942494
status = fvValue(i);
24952495
}
24962496

2497-
/* Sync an enabled member */
2498-
if (status == "enabled")
2497+
if (lag.m_members.find(port_alias) == lag.m_members.end())
24992498
{
2500-
/* Duplicate entry */
2501-
if (lag.m_members.find(port_alias) != lag.m_members.end())
2499+
/* Assert the port doesn't belong to any LAG already */
2500+
assert(!port.m_lag_id && !port.m_lag_member_id);
2501+
2502+
if (!addLagMember(lag, port))
25022503
{
2503-
it = consumer.m_toSync.erase(it);
2504+
it++;
25042505
continue;
25052506
}
2507+
}
25062508

2507-
/* Assert the port doesn't belong to any LAG */
2508-
assert(!port.m_lag_id && !port.m_lag_member_id);
2509-
2510-
if (addLagMember(lag, port))
2509+
/* Sync an enabled member */
2510+
if (status == "enabled")
2511+
{
2512+
/* enable collection first, distribution-only mode
2513+
* is not supported on Mellanox platform
2514+
*/
2515+
if (setCollectionOnLagMember(port, true) &&
2516+
setDistributionOnLagMember(port, true))
2517+
{
25112518
it = consumer.m_toSync.erase(it);
2519+
}
25122520
else
2521+
{
25132522
it++;
2523+
continue;
2524+
}
25142525
}
25152526
/* Sync an disabled member */
25162527
else /* status == "disabled" */
25172528
{
2518-
/* "status" is "disabled" at start when m_lag_id and
2519-
* m_lag_member_id are absent */
2520-
if (!port.m_lag_id || !port.m_lag_member_id)
2529+
/* disable distribution first, distribution-only mode
2530+
* is not supported on Mellanox platform
2531+
*/
2532+
if (setDistributionOnLagMember(port, false) &&
2533+
setCollectionOnLagMember(port, false))
25212534
{
25222535
it = consumer.m_toSync.erase(it);
2523-
continue;
25242536
}
2525-
2526-
if (removeLagMember(lag, port))
2527-
it = consumer.m_toSync.erase(it);
25282537
else
2538+
{
25292539
it++;
2540+
continue;
2541+
}
25302542
}
25312543
}
25322544
/* Remove a LAG member */
@@ -2544,9 +2556,13 @@ void PortsOrch::doLagMemberTask(Consumer &consumer)
25442556
}
25452557

25462558
if (removeLagMember(lag, port))
2559+
{
25472560
it = consumer.m_toSync.erase(it);
2561+
}
25482562
else
2563+
{
25492564
it++;
2565+
}
25502566
}
25512567
else
25522568
{
@@ -3340,6 +3356,52 @@ bool PortsOrch::removeLagMember(Port &lag, Port &port)
33403356
return true;
33413357
}
33423358

3359+
bool PortsOrch::setCollectionOnLagMember(Port &lagMember, bool enableCollection)
3360+
{
3361+
/* Port must be LAG member */
3362+
assert(port.m_lag_member_id);
3363+
3364+
sai_status_t status = SAI_STATUS_FAILURE;
3365+
sai_attribute_t attr {};
3366+
3367+
attr.id = SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE;
3368+
attr.value.booldata = !enableCollection;
3369+
3370+
status = sai_lag_api->set_lag_member_attribute(lagMember.m_lag_member_id, &attr);
3371+
if (status != SAI_STATUS_SUCCESS)
3372+
{
3373+
SWSS_LOG_ERROR("Failed to %s collection on LAG member %s",
3374+
enableCollection ? "enable" : "disable",
3375+
lagMember.m_alias.c_str());
3376+
return false;
3377+
}
3378+
3379+
return true;
3380+
}
3381+
3382+
bool PortsOrch::setDistributionOnLagMember(Port &lagMember, bool enableDistribution)
3383+
{
3384+
/* Port must be LAG member */
3385+
assert(port.m_lag_member_id);
3386+
3387+
sai_status_t status = SAI_STATUS_FAILURE;
3388+
sai_attribute_t attr {};
3389+
3390+
attr.id = SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE;
3391+
attr.value.booldata = !enableDistribution;
3392+
3393+
status = sai_lag_api->set_lag_member_attribute(lagMember.m_lag_member_id, &attr);
3394+
if (status != SAI_STATUS_SUCCESS)
3395+
{
3396+
SWSS_LOG_ERROR("Failed to %s distribution on LAG member %s",
3397+
enableDistribution ? "enable" : "disable",
3398+
lagMember.m_alias.c_str());
3399+
return false;
3400+
}
3401+
3402+
return true;
3403+
}
3404+
33433405
void PortsOrch::generateQueueMap()
33443406
{
33453407
if (m_isQueueMapGenerated)

orchagent/portsorch.h

+2
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ class PortsOrch : public Orch, public Subject
170170
bool removeLag(Port lag);
171171
bool addLagMember(Port &lag, Port &port);
172172
bool removeLagMember(Port &lag, Port &port);
173+
bool setCollectionOnLagMember(Port &lagMember, bool enableCollection);
174+
bool setDistributionOnLagMember(Port &lagMember, bool enableDistribution);
173175
void getLagMember(Port &lag, vector<Port> &portv);
174176

175177
bool addPort(const set<int> &lane_set, uint32_t speed, int an=0, string fec="");

tests/test_portchannel.py

+35-7
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,41 @@ def test_Portchannel(self, dvs, testlog):
3333
assert len(lagms) == 1
3434

3535
(status, fvs) = lagmtbl.get(lagms[0])
36-
for fv in fvs:
37-
if fv[0] == "SAI_LAG_MEMBER_ATTR_LAG_ID":
38-
assert fv[1] == lags[0]
39-
elif fv[0] == "SAI_LAG_MEMBER_ATTR_PORT_ID":
40-
assert dvs.asicdb.portoidmap[fv[1]] == "Ethernet0"
41-
else:
42-
assert False
36+
fvs = dict(fvs)
37+
assert status
38+
assert "SAI_LAG_MEMBER_ATTR_LAG_ID" in fvs
39+
assert fvs.pop("SAI_LAG_MEMBER_ATTR_LAG_ID") == lags[0]
40+
assert "SAI_LAG_MEMBER_ATTR_PORT_ID" in fvs
41+
assert dvs.asicdb.portoidmap[fvs.pop("SAI_LAG_MEMBER_ATTR_PORT_ID")] == "Ethernet0"
42+
assert "SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE" in fvs
43+
assert fvs.pop("SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE") == "false"
44+
assert "SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE" in fvs
45+
assert fvs.pop("SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE") == "false"
46+
assert not fvs
47+
48+
ps = swsscommon.ProducerStateTable(db, "LAG_MEMBER_TABLE")
49+
fvs = swsscommon.FieldValuePairs([("status", "disabled")])
50+
51+
ps.set("PortChannel0001:Ethernet0", fvs)
52+
53+
time.sleep(1)
54+
55+
lagmtbl = swsscommon.Table(asicdb, "ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER")
56+
lagms = lagmtbl.getKeys()
57+
assert len(lagms) == 1
58+
59+
(status, fvs) = lagmtbl.get(lagms[0])
60+
fvs = dict(fvs)
61+
assert status
62+
assert "SAI_LAG_MEMBER_ATTR_LAG_ID" in fvs
63+
assert fvs.pop("SAI_LAG_MEMBER_ATTR_LAG_ID") == lags[0]
64+
assert "SAI_LAG_MEMBER_ATTR_PORT_ID" in fvs
65+
assert dvs.asicdb.portoidmap[fvs.pop("SAI_LAG_MEMBER_ATTR_PORT_ID")] == "Ethernet0"
66+
assert "SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE" in fvs
67+
assert fvs.pop("SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE") == "true"
68+
assert "SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE" in fvs
69+
assert fvs.pop("SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE") == "true"
70+
assert not fvs
4371

4472
# remove port channel member
4573
ps = swsscommon.ProducerStateTable(db, "LAG_MEMBER_TABLE")

0 commit comments

Comments
 (0)