Skip to content

Commit 0434b62

Browse files
authored
[sai_failure_dump]Invoking dump during SAI failure (#1198)
* [sai_failure_dump]Invoking dump during SAI failure
1 parent 402eb14 commit 0434b62

8 files changed

+105
-7
lines changed

lib/RedisRemoteSaiInterface.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -2077,6 +2077,7 @@ sai_status_t RedisRemoteSaiInterface::sai_redis_notify_syncd(
20772077
case SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW:
20782078
case SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW:
20792079
case SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC:
2080+
case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
20802081
break;
20812082

20822083
default:
@@ -2118,6 +2119,12 @@ sai_status_t RedisRemoteSaiInterface::sai_redis_notify_syncd(
21182119

21192120
break;
21202121

2122+
case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
2123+
2124+
SWSS_LOG_NOTICE("invoked DUMP succeeded");
2125+
2126+
break;
2127+
21212128
default:
21222129
break;
21232130
}

lib/sairedis.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ typedef enum _sai_redis_notify_syncd_t
5151

5252
SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW,
5353

54-
SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC
54+
SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC,
5555

56+
SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP
5657
} sai_redis_notify_syncd_t;
5758

5859
typedef enum _sai_redis_communication_mode_t
@@ -248,5 +249,4 @@ typedef enum _sai_redis_switch_attr_t
248249
* @default 60000
249250
*/
250251
SAI_REDIS_SWITCH_ATTR_SYNC_OPERATION_RESPONSE_TIMEOUT,
251-
252252
} sai_redis_switch_attr_t;

lib/sairediscommon.h

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define SYNCD_INIT_VIEW "INIT_VIEW"
99
#define SYNCD_APPLY_VIEW "APPLY_VIEW"
1010
#define SYNCD_INSPECT_ASIC "SYNCD_INSPECT_ASIC"
11+
#define SYNCD_INVOKE_DUMP "SYNCD_INVOKE_DUMP"
1112

1213
#define ASIC_STATE_TABLE "ASIC_STATE"
1314
#define TEMP_PREFIX "TEMP_"

meta/SaiSerialize.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -2359,6 +2359,9 @@ std::string sai_serialize(
23592359
case SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC:
23602360
return SYNCD_INSPECT_ASIC;
23612361

2362+
case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
2363+
return SYNCD_INVOKE_DUMP;
2364+
23622365
default:
23632366

23642367
SWSS_LOG_THROW("unknown value on sai_redis_notify_syncd_t: %d", value);
@@ -4543,6 +4546,10 @@ void sai_deserialize(
45434546
{
45444547
value = SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC;
45454548
}
4549+
else if (s == SYNCD_INVOKE_DUMP)
4550+
{
4551+
value = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
4552+
}
45464553
else
45474554
{
45484555
SWSS_LOG_THROW("enum %s not found in sai_redis_notify_syncd_t", s.c_str());

syncd/Syncd.cpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "swss/select.h"
2020
#include "swss/tokenize.h"
2121
#include "swss/notificationproducer.h"
22+
#include "swss/exec.h"
2223

2324
#include "meta/sai_serialize.h"
2425
#include "meta/ZeroMQSelectableChannel.h"
@@ -34,6 +35,7 @@
3435
#include <algorithm>
3536

3637
#define DEF_SAI_WARM_BOOT_DATA_FILE "/var/warmboot/sai-warmboot.bin"
38+
#define SAI_FAILURE_DUMP_SCRIPT "/usr/bin/sai_failure_dump.sh"
3739

3840
using namespace syncd;
3941
using namespace saimeta;
@@ -3247,6 +3249,7 @@ sai_status_t Syncd::processNotifySyncd(
32473249
SWSS_LOG_ENTER();
32483250

32493251
auto& key = kfvKey(kco);
3252+
sai_status_t status = SAI_STATUS_SUCCESS;
32503253

32513254
if (!m_commandLineOptions->m_enableTempView)
32523255
{
@@ -3259,6 +3262,20 @@ sai_status_t Syncd::processNotifySyncd(
32593262

32603263
auto redisNotifySyncd = sai_deserialize_redis_notify_syncd(key);
32613264

3265+
if (redisNotifySyncd == SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP)
3266+
{
3267+
SWSS_LOG_NOTICE("Invoking SAI failure dump");
3268+
std::string ret_str;
3269+
int ret = swss::exec(SAI_FAILURE_DUMP_SCRIPT, ret_str);
3270+
if (ret != 0)
3271+
{
3272+
SWSS_LOG_ERROR("Error in executing SAI failure dump %s", ret_str.c_str());
3273+
status = SAI_STATUS_FAILURE;
3274+
}
3275+
sendNotifyResponse(status);
3276+
return status;
3277+
}
3278+
32623279
if (m_veryFirstRun && m_firstInitWasPerformed && redisNotifySyncd == SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW)
32633280
{
32643281
/*
@@ -3273,7 +3290,6 @@ sai_status_t Syncd::processNotifySyncd(
32733290
{
32743291
SWSS_LOG_NOTICE("very first run is TRUE, op = %s", key.c_str());
32753292

3276-
sai_status_t status = SAI_STATUS_SUCCESS;
32773293

32783294
/*
32793295
* On the very first start of syncd, "compile" view is directly applied
@@ -3348,7 +3364,6 @@ sai_status_t Syncd::processNotifySyncd(
33483364

33493365
SWSS_LOG_WARN("syncd received APPLY VIEW, will translate");
33503366

3351-
sai_status_t status;
33523367

33533368
try
33543369
{

syncd/scripts/sai_failure_dump.sh

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Script for sai failure dump
4+
#
5+
6+
# Source the platform specific dump file
7+
8+
SAI_MAX_FAILURE_DUMPS=10
9+
10+
DUMPDIR=/var/log/sai_failure_dump
11+
12+
if [ -f /usr/bin/platform_syncd_dump.sh ]; then
13+
. ./usr/bin/platform_syncd_dump.sh
14+
fi
15+
16+
if [ -z "$(ls -A $DUMPDIR/)" ]; then
17+
exit 0
18+
fi
19+
20+
# Perform rotation
21+
22+
ls -1td $DUMPDIR/* | tail -n +$(($SAI_MAX_FAILURE_DUMPS+1)) | xargs rm -rf

syncd/scripts/syncd_init_common.sh

+3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ if [[ "$(cat /proc/cmdline)" != *"SONIC_BOOT_TYPE=fast-reboot"* ]]; then
3131
CMD_ARGS+=" -u"
3232
fi
3333

34+
# Create a folder for SAI failure dump files
35+
mkdir -p /var/log/sai_failure_dump/
36+
3437
# Use bulk APIs in SAI
3538
# currently disabled since most vendors don't support that yet
3639
# CMD_ARGS+=" -l"

syncd/tests.cpp

+46-3
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,26 @@ using namespace syncd;
4242
if ((status)!=SAI_STATUS_SUCCESS) \
4343
SWSS_LOG_THROW(format ": %s", ##__VA_ARGS__, sai_serialize_status(status).c_str());
4444

45+
#define SAI_FAILURE_DUMP_SCRIPT "/usr/bin/sai_failure_dump.sh"
46+
47+
#define CHECK_STATUS(x) \
48+
if (status != SAI_STATUS_SUCCESS) { exit(1); }
49+
50+
4551
using namespace saimeta;
52+
53+
std::string mockCallArg;
54+
55+
namespace swss {
56+
int exec(const std::string &cmd, std::string &stdout)
57+
{
58+
SWSS_LOG_ENTER();
59+
60+
mockCallArg=cmd;
61+
return 0;
62+
}
63+
}
64+
4665
static std::shared_ptr<swss::DBConnector> g_db1;
4766

4867
static sai_next_hop_group_api_t test_next_hop_group_api;
@@ -652,9 +671,6 @@ void test_bulk_route_set()
652671
ASSERT_SUCCESS("Failed to bulk remove route entry");
653672
}
654673

655-
#define CHECK_STATUS(x) \
656-
if (status != SAI_STATUS_SUCCESS) { exit(1); }
657-
658674
void syncdThread()
659675
{
660676
SWSS_LOG_ENTER();
@@ -678,6 +694,31 @@ void syncdThread()
678694
syncd->run();
679695
}
680696

697+
void test_invoke_dump()
698+
{
699+
SWSS_LOG_ENTER();
700+
clearDB();
701+
702+
auto syncd = std::make_shared<std::thread>(syncdThread);
703+
syncd->detach();
704+
705+
sai_attribute_t attr;
706+
attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
707+
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
708+
709+
auto sairedis = std::make_shared<sairedis::Sai>();
710+
711+
sai_status_t status = sairedis->initialize(0, &test_services);
712+
713+
CHECK_STATUS(status);
714+
715+
status = sairedis->set(SAI_OBJECT_TYPE_SWITCH, SAI_NULL_OBJECT_ID, &attr);
716+
717+
ASSERT_SUCCESS("Failed to invoke dump");
718+
assert(mockCallArg == SAI_FAILURE_DUMP_SCRIPT);
719+
}
720+
721+
681722
void test_bulk_route_create()
682723
{
683724
SWSS_LOG_ENTER();
@@ -869,6 +910,8 @@ int main()
869910
printf("\n[ %s ]\n\n", sai_serialize_status(SAI_STATUS_SUCCESS).c_str());
870911

871912
test_watchdog_timer_clock_rollback();
913+
914+
test_invoke_dump();
872915
}
873916
catch (const std::exception &e)
874917
{

0 commit comments

Comments
 (0)