Skip to content

Commit e34104e

Browse files
sihuihan88lguohan
authored andcommitted
[pfcwd]: support BIG_RED_SWITCH mode (#467)
* [pfcwd]: enable BIG_RED_SWITCH mode Signed-off-by: Sihui Han <sihan@microsoft.com> * update as comments
1 parent 1f857d5 commit e34104e

6 files changed

+224
-31
lines changed

orchagent/pfc_detect_broadcom.lua

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ for i = n, 1, -1 do
2121
local is_deadlock = false
2222
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
2323
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
24-
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
24+
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
25+
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
2526
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
2627
if detection_time then
2728
detection_time = tonumber(detection_time)

orchagent/pfc_detect_mellanox.lua

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ for i = n, 1, -1 do
2121
local is_deadlock = false
2222
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
2323
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
24-
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
24+
25+
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
26+
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
2527
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
2628
if detection_time then
2729
detection_time = tonumber(detection_time)

orchagent/pfc_restore.lua

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ for i = n, 1, -1 do
2020
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
2121
local restoration_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME')
2222
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
23-
if pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then
23+
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
24+
if not big_red_switch_mode and pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then
2425
restoration_time = tonumber(restoration_time)
2526
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT')
2627
if time_left == nil then

orchagent/pfcactionhandler.cpp

-22
Original file line numberDiff line numberDiff line change
@@ -37,34 +37,12 @@ PfcWdActionHandler::PfcWdActionHandler(sai_object_id_t port, sai_object_id_t que
3737
m_countersTable(countersTable)
3838
{
3939
SWSS_LOG_ENTER();
40-
41-
Port p;
42-
if (!gPortsOrch->getPort(port, p))
43-
{
44-
SWSS_LOG_ERROR("Unknown port id 0x%lx", port);
45-
}
46-
else
47-
{
48-
m_portAlias = p.m_alias;
49-
SWSS_LOG_NOTICE(
50-
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
51-
m_portAlias.c_str(),
52-
m_queueId,
53-
m_queue,
54-
m_port);
55-
}
5640
}
5741

5842
PfcWdActionHandler::~PfcWdActionHandler(void)
5943
{
6044
SWSS_LOG_ENTER();
6145

62-
SWSS_LOG_NOTICE(
63-
"PFC Watchdog storm restored on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
64-
m_portAlias.c_str(),
65-
m_queueId,
66-
m_queue,
67-
m_port);
6846
}
6947

7048
void PfcWdActionHandler::initCounters(void)

orchagent/pfcwdorch.cpp

+207-5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define PFC_WD_ACTION "action"
1515
#define PFC_WD_DETECTION_TIME "detection_time"
1616
#define PFC_WD_RESTORATION_TIME "restoration_time"
17+
#define BIG_RED_SWITCH_FIELD "BIG_RED_SWITCH"
1718

1819
#define PFC_WD_DETECTION_TIME_MAX (5 * 1000)
1920
#define PFC_WD_DETECTION_TIME_MIN 100
@@ -261,6 +262,8 @@ template <typename DropHandler, typename ForwardHandler>
261262
void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
262263
const vector<FieldValueTuple>& data)
263264
{
265+
SWSS_LOG_ENTER();
266+
264267
if (key == PFC_WD_GLOBAL)
265268
{
266269
for (auto valuePair: data)
@@ -274,6 +277,11 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
274277
fieldValues.emplace_back(POLL_INTERVAL_FIELD, value);
275278
m_flexCounterGroupTable->set(PFC_WD_FLEX_COUNTER_GROUP, fieldValues);
276279
}
280+
else if (field == BIG_RED_SWITCH_FIELD)
281+
{
282+
SWSS_LOG_NOTICE("Recieve brs mode set, %s", value.c_str());
283+
setBigRedSwitchMode(value);
284+
}
277285
}
278286
}
279287
else
@@ -282,6 +290,166 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
282290
}
283291
}
284292

293+
template <typename DropHandler, typename ForwardHandler>
294+
void PfcWdSwOrch<DropHandler, ForwardHandler>::setBigRedSwitchMode(const string value)
295+
{
296+
SWSS_LOG_ENTER();
297+
298+
if (value == "enable")
299+
{
300+
// When BIG_RED_SWITCH mode is enabled, pfcwd is automatically disabled
301+
enableBigRedSwitchMode();
302+
}
303+
else if (value == "disable")
304+
{
305+
disableBigRedSwitchMode();
306+
}
307+
else
308+
{
309+
SWSS_LOG_NOTICE("Unsupported BIG_RED_SWITCH mode set input, please use enable or disable");
310+
}
311+
312+
}
313+
314+
template <typename DropHandler, typename ForwardHandler>
315+
void PfcWdSwOrch<DropHandler, ForwardHandler>::disableBigRedSwitchMode()
316+
{
317+
SWSS_LOG_ENTER();
318+
319+
m_bigRedSwitchFlag = false;
320+
// Disable pfcwdaction hanlder on each queue if exists.
321+
for (auto &entry : m_brsEntryMap)
322+
{
323+
324+
if (entry.second.handler != nullptr)
325+
{
326+
SWSS_LOG_NOTICE(
327+
"PFC Watchdog BIG_RED_SWITCH mode disabled on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
328+
entry.second.portAlias.c_str(),
329+
entry.second.index,
330+
entry.first,
331+
entry.second.portId);
332+
333+
entry.second.handler->commitCounters();
334+
entry.second.handler = nullptr;
335+
}
336+
337+
auto queueId = entry.first;
338+
RedisClient redisClient(PfcWdOrch<DropHandler, ForwardHandler>::getCountersDb().get());
339+
string countersKey = COUNTERS_TABLE ":" + sai_serialize_object_id(queueId);
340+
redisClient.hdel(countersKey, "BIG_RED_SWITCH_MODE");
341+
}
342+
343+
m_brsEntryMap.clear();
344+
}
345+
346+
template <typename DropHandler, typename ForwardHandler>
347+
void PfcWdSwOrch<DropHandler, ForwardHandler>::enableBigRedSwitchMode()
348+
{
349+
SWSS_LOG_ENTER();
350+
351+
m_bigRedSwitchFlag = true;
352+
// Write to database that each queue enables BIG_RED_SWITCH
353+
auto allPorts = gPortsOrch->getAllPorts();
354+
sai_attribute_t attr;
355+
attr.id = SAI_PORT_ATTR_PRIORITY_FLOW_CONTROL;
356+
357+
for (auto &it: allPorts)
358+
{
359+
Port port = it.second;
360+
361+
if (port.m_type != Port::PHY)
362+
{
363+
SWSS_LOG_INFO("Skip non-phy port %s", port.m_alias.c_str());
364+
continue;
365+
}
366+
367+
// use portorch api to get lossless tc in future.
368+
sai_status_t status = sai_port_api->get_port_attribute(port.m_port_id, 1, &attr);
369+
if (status != SAI_STATUS_SUCCESS)
370+
{
371+
SWSS_LOG_ERROR("Failed to get PFC mask on port %s: %d", port.m_alias.c_str(), status);
372+
return;
373+
}
374+
375+
uint8_t pfcMask = attr.value.u8;
376+
for (uint8_t i = 0; i < PFC_WD_TC_MAX; i++)
377+
{
378+
sai_object_id_t queueId = port.m_queue_ids[i];
379+
if ((pfcMask & (1 << i)) == 0 && m_entryMap.find(queueId) == m_entryMap.end())
380+
{
381+
continue;
382+
}
383+
384+
string queueIdStr = sai_serialize_object_id(queueId);
385+
386+
vector<FieldValueTuple> countersFieldValues;
387+
countersFieldValues.emplace_back("BIG_RED_SWITCH_MODE", "enable");
388+
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable()->set(queueIdStr, countersFieldValues);
389+
}
390+
}
391+
392+
// Disable pfcwdaction handler on each queue if exists.
393+
for (auto & entry: m_entryMap)
394+
{
395+
if (entry.second.handler != nullptr)
396+
{
397+
entry.second.handler->commitCounters();
398+
entry.second.handler = nullptr;
399+
}
400+
}
401+
402+
// Create pfcwdaction hanlder on all the ports.
403+
for (auto & it: allPorts)
404+
{
405+
Port port = it.second;
406+
if (port.m_type != Port::PHY)
407+
{
408+
SWSS_LOG_INFO("Skip non-phy port %s", port.m_alias.c_str());
409+
continue;
410+
}
411+
412+
// use portorch api to get lossless tc in future after asym PFC is available.
413+
sai_status_t status = sai_port_api->get_port_attribute(port.m_port_id, 1, &attr);
414+
if (status != SAI_STATUS_SUCCESS)
415+
{
416+
SWSS_LOG_ERROR("Failed to get PFC mask on port %s: %d", port.m_alias.c_str(), status);
417+
return;
418+
}
419+
420+
uint8_t pfcMask = attr.value.u8;
421+
for (uint8_t i = 0; i < PFC_WD_TC_MAX; i++)
422+
{
423+
if ((pfcMask & (1 << i)) == 0)
424+
{
425+
continue;
426+
}
427+
428+
sai_object_id_t queueId = port.m_queue_ids[i];
429+
string queueIdStr = sai_serialize_object_id(queueId);
430+
431+
auto entry = m_brsEntryMap.emplace(queueId, PfcWdQueueEntry(PfcWdAction::PFC_WD_ACTION_DROP, port.m_port_id, i, port.m_alias)).first;
432+
433+
if (entry->second.handler== nullptr)
434+
{
435+
SWSS_LOG_NOTICE(
436+
"PFC Watchdog BIG_RED_SWITCH mode enabled on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
437+
entry->second.portAlias.c_str(),
438+
entry->second.index,
439+
entry->first,
440+
entry->second.portId);
441+
442+
entry->second.handler = make_shared<DropHandler>(
443+
entry->second.portId,
444+
entry->first,
445+
entry->second.index,
446+
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
447+
entry->second.handler->initCounters();
448+
}
449+
}
450+
}
451+
}
452+
285453
template <typename DropHandler, typename ForwardHandler>
286454
void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
287455
uint32_t detectionTime, uint32_t restorationTime, PfcWdAction action)
@@ -355,7 +523,7 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
355523
}
356524

357525
// Create internal entry
358-
m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i));
526+
m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i, port.m_alias));
359527

360528
string key = getFlexCounterTableKey(queueIdStr);
361529
m_flexCounterTable->set(key, queueFieldValues);
@@ -513,10 +681,11 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::~PfcWdSwOrch(void)
513681

514682
template <typename DropHandler, typename ForwardHandler>
515683
PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdQueueEntry::PfcWdQueueEntry(
516-
PfcWdAction action, sai_object_id_t port, uint8_t idx):
684+
PfcWdAction action, sai_object_id_t port, uint8_t idx, string alias):
517685
action(action),
518686
portId(port),
519-
index(idx)
687+
index(idx),
688+
portAlias(alias)
520689
{
521690
SWSS_LOG_ENTER();
522691
}
@@ -564,12 +733,24 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
564733
}
565734

566735
SWSS_LOG_NOTICE("Receive notification, %s", event.c_str());
567-
if (event == "storm")
736+
737+
if (m_bigRedSwitchFlag)
738+
{
739+
SWSS_LOG_NOTICE("Big_RED_SWITCH mode is on, ingore syncd pfc watchdog notification");
740+
}
741+
else if (event == "storm")
568742
{
569743
if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT)
570744
{
571745
if (entry->second.handler == nullptr)
572746
{
747+
SWSS_LOG_NOTICE(
748+
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
749+
entry->second.portAlias.c_str(),
750+
entry->second.index,
751+
entry->first,
752+
entry->second.portId);
753+
573754
entry->second.handler = make_shared<PfcWdActionHandler>(
574755
entry->second.portId,
575756
entry->first,
@@ -582,6 +763,13 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
582763
{
583764
if (entry->second.handler == nullptr)
584765
{
766+
SWSS_LOG_NOTICE(
767+
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
768+
entry->second.portAlias.c_str(),
769+
entry->second.index,
770+
entry->first,
771+
entry->second.portId);
772+
585773
entry->second.handler = make_shared<DropHandler>(
586774
entry->second.portId,
587775
entry->first,
@@ -594,6 +782,13 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
594782
{
595783
if (entry->second.handler == nullptr)
596784
{
785+
SWSS_LOG_NOTICE(
786+
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
787+
entry->second.portAlias.c_str(),
788+
entry->second.index,
789+
entry->first,
790+
entry->second.portId);
791+
597792
entry->second.handler = make_shared<ForwardHandler>(
598793
entry->second.portId,
599794
entry->first,
@@ -604,13 +799,20 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
604799
}
605800
else
606801
{
607-
throw runtime_error("Unknown PFC WD action");
802+
SWSS_LOG_ERROR("Unknown PFC WD action");
608803
}
609804
}
610805
else if (event == "restore")
611806
{
612807
if (entry->second.handler != nullptr)
613808
{
809+
SWSS_LOG_NOTICE(
810+
"PFC Watchdog storm restored on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
811+
entry->second.portAlias.c_str(),
812+
entry->second.index,
813+
entry->first,
814+
entry->second.portId);
815+
614816
entry->second.handler->commitCounters();
615817
entry->second.handler = nullptr;
616818
}

orchagent/pfcwdorch.h

+10-1
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,13 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
7878
PfcWdQueueEntry(
7979
PfcWdAction action,
8080
sai_object_id_t port,
81-
uint8_t idx);
81+
uint8_t idx,
82+
string alias);
8283

8384
PfcWdAction action = PfcWdAction::PFC_WD_ACTION_UNKNOWN;
8485
sai_object_id_t portId = SAI_NULL_OBJECT_ID;
8586
uint8_t index = 0;
87+
string portAlias;
8688
shared_ptr<PfcWdActionHandler> handler = { nullptr };
8789
};
8890

@@ -95,7 +97,13 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
9597

9698
string filterPfcCounters(string counters, set<uint8_t>& losslessTc);
9799
string getFlexCounterTableKey(string s);
100+
101+
void disableBigRedSwitchMode();
102+
void enableBigRedSwitchMode();
103+
void setBigRedSwitchMode(string value);
104+
98105
map<sai_object_id_t, PfcWdQueueEntry> m_entryMap;
106+
map<sai_object_id_t, PfcWdQueueEntry> m_brsEntryMap;
99107

100108
const vector<sai_port_stat_t> c_portStatIds;
101109
const vector<sai_queue_stat_t> c_queueStatIds;
@@ -105,6 +113,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
105113
shared_ptr<ProducerTable> m_flexCounterTable = nullptr;
106114
shared_ptr<ProducerTable> m_flexCounterGroupTable = nullptr;
107115

116+
bool m_bigRedSwitchFlag = false;
108117
int m_pollInterval;
109118
};
110119

0 commit comments

Comments
 (0)