From cc768db09a4879e92d41fd804867589d15994fc2 Mon Sep 17 00:00:00 2001 From: vganesan-nokia <67648637+vganesan-nokia@users.noreply.github.com> Date: Thu, 12 Jan 2023 15:31:57 -0500 Subject: [PATCH] [routesync] Fix for stale dynamic neighbor (#2553) * [routesync] Fix for stale dynamic neighbor The changes are for fixing stale neighbor in the ASIC_DB and data path when eBGP neighbors are shutdown and neighbors are flushed. The problem is described in issue: https://github.com/sonic-net/sonic-buildimage/issues/12442 The root cause of this issue is due to not deleing the route from the ASIC_DB when the route's next hop is on eth0 or docker0 interface. The solution is to delete the route entry from ASIC_DB instead of just returning when the route's next hop is on the interface eth0 or docker0 This commit fixes the warm restart unit test failure. When the route with only nh on eth0 or docker0 is removed and if the route is the default route, orchagent sends "add" black hole route to the syncd. So the ASIC DB gets n hset message. When this happens during warm restart, the unit test identifies this as unwanted setting and the unit test fails. To fix this issues, the route delete is sent only if the warm restart is not in progress. This is done following the same warm restart handling approach used for route delete in other palces. Signed-off-by: vedganes --- fpmsyncd/routesync.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fpmsyncd/routesync.cpp b/fpmsyncd/routesync.cpp index ca00d4f77d..e87b9fa323 100644 --- a/fpmsyncd/routesync.cpp +++ b/fpmsyncd/routesync.cpp @@ -733,6 +733,32 @@ void RouteSync::onRouteMsg(int nlmsg_type, struct nl_object *obj, char *vrf) { SWSS_LOG_DEBUG("Skip routes to eth0 or docker0: %s %s %s", destipprefix, gw_list.c_str(), intf_list.c_str()); + // If intf_list has only this interface, that means all of the next hops of this route + // have been removed and the next hop on the eth0/docker0 has become the only next hop. + // In this case since we do not want the route with next hop on eth0/docker0, we return. + // But still we need to clear the route from the APPL_DB. Otherwise the APPL_DB and data + // path will be left with stale route entry + if(alsv.size() == 1) + { + if (!warmRestartInProgress) + { + SWSS_LOG_NOTICE("RouteTable del msg for route with only one nh on eth0/docker0: %s %s %s %s", + destipprefix, gw_list.c_str(), intf_list.c_str(), mpls_list.c_str()); + + m_routeTable.del(destipprefix); + } + else + { + SWSS_LOG_NOTICE("Warm-Restart mode: Receiving delete msg for route with only nh on eth0/docker0: %s %s %s %s", + destipprefix, gw_list.c_str(), intf_list.c_str(), mpls_list.c_str()); + + vector fvVector; + const KeyOpFieldsValuesTuple kfv = std::make_tuple(destipprefix, + DEL_COMMAND, + fvVector); + m_warmStartHelper.insertRefreshMap(kfv); + } + } return; } }