From fcc52d535491b2d79c9c346b534294ed03dbc350 Mon Sep 17 00:00:00 2001 From: Vivek Reddy Karri Date: Fri, 11 Aug 2023 00:40:02 +0000 Subject: [PATCH 1/3] Run db_migrator for non first-time reboots Signed-off-by: Vivek Reddy Karri --- files/build_templates/docker_image_ctl.j2 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index 924f0283bad6..aeab13e09bc4 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -259,6 +259,13 @@ function postStartAction() # This flag will be set to "1" after DB migration/initialization is completed as part of config-setup $SONIC_DB_CLI CONFIG_DB SET "CONFIG_DB_INITIALIZED" "0" else + # this is not a first time boot to a new image. Datbase container starts w/ old pre-existing config + if [[ -x /usr/local/bin/db_migrator.py ]]; then + # Migrate the DB to the latest schema version if needed + if [ -z "$DEV" ]; then + /usr/local/bin/db_migrator.py -o migrate + fi + fi # set CONFIG_DB_INITIALIZED to indicate end of config load and migration $SONIC_DB_CLI CONFIG_DB SET "CONFIG_DB_INITIALIZED" "1" fi From 01c8b0275285e6f14c9f88d5881ba2ff41370f3f Mon Sep 17 00:00:00 2001 From: Vivek Reddy Karri Date: Sat, 19 Aug 2023 00:13:28 +0000 Subject: [PATCH 2/3] [nvidia][hsflowd] Fix Dropmon co-operation issues related to HW stop Signed-off-by: Vivek Reddy Karri --- src/sflow/hsflowd/Makefile | 4 + ...-only-start-stop-sw-drops-for-nvidia.patch | 120 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch diff --git a/src/sflow/hsflowd/Makefile b/src/sflow/hsflowd/Makefile index 35d17783f903..a56dc951c4c2 100644 --- a/src/sflow/hsflowd/Makefile +++ b/src/sflow/hsflowd/Makefile @@ -19,6 +19,10 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% : if [[ $(ENABLE_SFLOW_DROPMON) == y ]]; then stg repair stg import -s ../patch/dropmon/series + + if [[ $(CONFIGURED_PLATFORM) == mellanox ]]; then + stg import ../patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch + fi fi mkdir -p debian diff --git a/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch b/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch new file mode 100644 index 000000000000..6bc39d680938 --- /dev/null +++ b/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch @@ -0,0 +1,120 @@ +From 7c99125f690e50aea22d9a11854564a162000687 Mon Sep 17 00:00:00 2001 +From: Vivek Reddy +Date: Sat, 19 Aug 2023 00:01:28 +0000 +Subject: [PATCH] TFrom 5d74a7e54a4bc2964b72f8301d868a9ddadb676c Mon Sep 17 + 00:00:00 2001 Subject: [PATCH] From 8b93f8c395f06d774f05551b5f88758e2521084a + Mon Sep 17 00:00:00 2001 Subject: [PATCH] only start/stop sw drops for + nvidia + +1) start/stop of hw drops is controlled by a different daemon in nvidia +2) Improve logging +3) Only start/stop sw drops if the sw=on in configured through config + +Signed-off-by: vkarri +--- + src/Linux/mod_dropmon.c | 40 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +diff --git a/src/Linux/mod_dropmon.c b/src/Linux/mod_dropmon.c +index e8f26e5..c9e6884 100644 +--- a/src/Linux/mod_dropmon.c ++++ b/src/Linux/mod_dropmon.c +@@ -143,7 +143,7 @@ extern "C" { + static void setState(EVMod *mod, EnumDropmonState newState) { + HSP_mod_DROPMON *mdata = (HSP_mod_DROPMON *)mod->data; + if(newState != mdata->state) { +- myDebug(1, "dropmon state %s -> %s", ++ myLog(LOG_INFO, "dropmon state %s -> %s", + HSPDropmonStateNames[mdata->state], + HSPDropmonStateNames[newState]); + mdata->state = newState; +@@ -410,37 +410,34 @@ That would allow everything to stay on the stack as it does here, which has nice + startIt + ? HSP_DROPMON_STATE_START + : HSP_DROPMON_STATE_STOP); +- ++ + struct nlmsghdr nlh = { }; + struct genlmsghdr ge = { }; +- struct nlattr attr1 = { }; +- struct nlattr attr2 = { }; ++ struct nlattr attr = { }; + +- attr1.nla_len = sizeof(attr1); +- attr1.nla_type = NET_DM_ATTR_SW_DROPS; +- attr2.nla_len = sizeof(attr2); +- attr2.nla_type = NET_DM_ATTR_HW_DROPS; ++ /* sflow should only control start/stop SW_DROPS, HW_DROPS is controlled by a different daemon for NVIDIA */ ++ attr.nla_len = sizeof(attr); ++ attr.nla_type = NET_DM_ATTR_SW_DROPS; + + ge.cmd = startIt + ? NET_DM_CMD_START + : NET_DM_CMD_STOP; + ge.version = 1; + +- nlh.nlmsg_len = NLMSG_LENGTH(sizeof(ge) + sizeof(attr1) + sizeof(attr2)); ++ nlh.nlmsg_len = NLMSG_LENGTH(sizeof(ge) + sizeof(attr)); + nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh.nlmsg_type = mdata->family_id; + nlh.nlmsg_seq = ++mdata->nl_seq; + nlh.nlmsg_pid = UTNLGeneric_pid(mod->id); + +- struct iovec iov[4] = { ++ struct iovec iov[3] = { + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = &ge, .iov_len = sizeof(ge) }, +- { .iov_base = &attr1, .iov_len = sizeof(attr1) }, +- { .iov_base = &attr2, .iov_len = sizeof(attr2) }, ++ { .iov_base = &attr, .iov_len = sizeof(attr) }, + }; + + struct sockaddr_nl sa = { .nl_family = AF_NETLINK }; +- struct msghdr msg = { .msg_name = &sa, .msg_namelen = sizeof(sa), .msg_iov = iov, .msg_iovlen = 4 }; ++ struct msghdr msg = { .msg_name = &sa, .msg_namelen = sizeof(sa), .msg_iov = iov, .msg_iovlen = 3 }; + return sendmsg(mdata->nl_sock, &msg, 0); + } + +@@ -871,9 +868,10 @@ That would allow everything to stay on the stack as it does here, which has nice + mdata->state, + err_msg->error, + strerror(-err_msg->error)); +- if(mdata->state == HSP_DROPMON_STATE_CONFIGURE +- || mdata->state == HSP_DROPMON_STATE_START) ++ if(mdata->state == HSP_DROPMON_STATE_START) + mdata->feedControlErrors++; ++ else if (mdata->state == HSP_DROPMON_STATE_CONFIGURE && err_msg->error == -EBUSY) ++ myLog(LOG_INFO, "Configuring DropMon Failed, Module is already in Monitoring State, Continue..."); + } + break; + } +@@ -944,11 +942,11 @@ That would allow everything to stay on the stack as it does here, which has nice + // TODO: may want to confirm that none of the parameters were + // changed under our feet too? + if(mdata->feedControlErrors > 0) { +- myDebug(1, "dropmon: detected feed-control errors: %u", mdata->feedControlErrors); +- myDebug(1, "dropmon: assume external control - not stopping feed"); ++ myLog(LOG_INFO, "dropmon: detected feed-control errors: %u", mdata->feedControlErrors); ++ myLog(LOG_INFO, "dropmon: assume external control - not stopping feed"); + } +- else { +- myDebug(1, "dropmon: graceful shutdown: turning off feed"); ++ else if(sp->dropmon.sw) { ++ myLog(LOG_INFO, "dropmon: graceful shutdown: turning off feed"); + start_DROPMON(mod, NO); + } + } +@@ -1022,7 +1020,9 @@ That would allow everything to stay on the stack as it does here, which has nice + // failure if the channel was already configured externally. + // TODO: should probably wait for answer before ploughing + // ahead with this start_DROPMON call. +- start_DROPMON(mod, YES); ++ // Configure SW_DROPS only if sp->dropmon.sw == YES ++ if (sp->dropmon.sw) ++ start_DROPMON(mod, YES); + break; + case HSP_DROPMON_STATE_START: + // waiting for start response +-- +2.17.1 + From e7697dca86bd77d081b0f98bebf84e4da7334865 Mon Sep 17 00:00:00 2001 From: Vivek Reddy Karri Date: Sat, 19 Aug 2023 00:58:04 +0000 Subject: [PATCH 3/3] Revert "Run db_migrator for non first-time reboots" This reverts commit fcc52d535491b2d79c9c346b534294ed03dbc350. --- files/build_templates/docker_image_ctl.j2 | 7 --- ...-only-start-stop-sw-drops-for-nvidia.patch | 44 ++++++++++--------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index aeab13e09bc4..924f0283bad6 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -259,13 +259,6 @@ function postStartAction() # This flag will be set to "1" after DB migration/initialization is completed as part of config-setup $SONIC_DB_CLI CONFIG_DB SET "CONFIG_DB_INITIALIZED" "0" else - # this is not a first time boot to a new image. Datbase container starts w/ old pre-existing config - if [[ -x /usr/local/bin/db_migrator.py ]]; then - # Migrate the DB to the latest schema version if needed - if [ -z "$DEV" ]; then - /usr/local/bin/db_migrator.py -o migrate - fi - fi # set CONFIG_DB_INITIALIZED to indicate end of config load and migration $SONIC_DB_CLI CONFIG_DB SET "CONFIG_DB_INITIALIZED" "1" fi diff --git a/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch b/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch index 6bc39d680938..fe7fe9c4f4b9 100644 --- a/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch +++ b/src/sflow/hsflowd/patch/dropmon/0001-only-start-stop-sw-drops-for-nvidia.patch @@ -1,22 +1,19 @@ -From 7c99125f690e50aea22d9a11854564a162000687 Mon Sep 17 00:00:00 2001 +From 891ed872a95bc2a77c14bfe859b89ad3db21b7ad Mon Sep 17 00:00:00 2001 From: Vivek Reddy Date: Sat, 19 Aug 2023 00:01:28 +0000 -Subject: [PATCH] TFrom 5d74a7e54a4bc2964b72f8301d868a9ddadb676c Mon Sep 17 - 00:00:00 2001 Subject: [PATCH] From 8b93f8c395f06d774f05551b5f88758e2521084a - Mon Sep 17 00:00:00 2001 Subject: [PATCH] only start/stop sw drops for - nvidia +Subject: [PATCH] only start/stop sw drops for nvidia -1) start/stop of hw drops is controlled by a different daemon in nvidia +1) Don't start/stop hw drops for nvidia 2) Improve logging -3) Only start/stop sw drops if the sw=on in configured through config +3) Only start/stop sw drops if sw=on -Signed-off-by: vkarri +Signed-off-by: vkarri --- - src/Linux/mod_dropmon.c | 40 ++++++++++++++++++++-------------------- - 1 file changed, 20 insertions(+), 20 deletions(-) + src/Linux/mod_dropmon.c | 39 +++++++++++++++++++++------------------ + 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/Linux/mod_dropmon.c b/src/Linux/mod_dropmon.c -index e8f26e5..c9e6884 100644 +index e8f26e5..869ea0c 100644 --- a/src/Linux/mod_dropmon.c +++ b/src/Linux/mod_dropmon.c @@ -143,7 +143,7 @@ extern "C" { @@ -28,11 +25,20 @@ index e8f26e5..c9e6884 100644 HSPDropmonStateNames[mdata->state], HSPDropmonStateNames[newState]); mdata->state = newState; -@@ -410,37 +410,34 @@ That would allow everything to stay on the stack as it does here, which has nice +@@ -405,42 +405,43 @@ That would allow everything to stay on the stack as it does here, which has nice + + static int start_DROPMON(EVMod *mod, bool startIt) + { ++ HSP *sp = (HSP *)EVROOTDATA(mod); + HSP_mod_DROPMON *mdata = (HSP_mod_DROPMON *)mod->data; + setState(mod, startIt ? HSP_DROPMON_STATE_START : HSP_DROPMON_STATE_STOP); - ++ ++ if (!sp->dropmon.sw) ++ return 0; + struct nlmsghdr nlh = { }; struct genlmsghdr ge = { }; @@ -75,7 +81,7 @@ index e8f26e5..c9e6884 100644 return sendmsg(mdata->nl_sock, &msg, 0); } -@@ -871,9 +868,10 @@ That would allow everything to stay on the stack as it does here, which has nice +@@ -871,9 +872,10 @@ That would allow everything to stay on the stack as it does here, which has nice mdata->state, err_msg->error, strerror(-err_msg->error)); @@ -88,7 +94,7 @@ index e8f26e5..c9e6884 100644 } break; } -@@ -944,11 +942,11 @@ That would allow everything to stay on the stack as it does here, which has nice +@@ -944,11 +946,11 @@ That would allow everything to stay on the stack as it does here, which has nice // TODO: may want to confirm that none of the parameters were // changed under our feet too? if(mdata->feedControlErrors > 0) { @@ -97,24 +103,20 @@ index e8f26e5..c9e6884 100644 + myLog(LOG_INFO, "dropmon: detected feed-control errors: %u", mdata->feedControlErrors); + myLog(LOG_INFO, "dropmon: assume external control - not stopping feed"); } -- else { + else { - myDebug(1, "dropmon: graceful shutdown: turning off feed"); -+ else if(sp->dropmon.sw) { + myLog(LOG_INFO, "dropmon: graceful shutdown: turning off feed"); start_DROPMON(mod, NO); } } -@@ -1022,7 +1020,9 @@ That would allow everything to stay on the stack as it does here, which has nice +@@ -1022,6 +1024,7 @@ That would allow everything to stay on the stack as it does here, which has nice // failure if the channel was already configured externally. // TODO: should probably wait for answer before ploughing // ahead with this start_DROPMON call. -- start_DROPMON(mod, YES); + // Configure SW_DROPS only if sp->dropmon.sw == YES -+ if (sp->dropmon.sw) -+ start_DROPMON(mod, YES); + start_DROPMON(mod, YES); break; case HSP_DROPMON_STATE_START: - // waiting for start response -- 2.17.1