From b34f783b530e21394f877ac9ff2e6df3f96abf47 Mon Sep 17 00:00:00 2001 From: Shi Su <67605788+shi-su@users.noreply.github.com> Date: Tue, 20 Apr 2021 14:09:34 -0700 Subject: [PATCH] Resolve neighbor when nexthop does not exist (#1704) What I did Resolve neighbor entries if nexthop does not exist when adding routes. Why I did it In some scenarios, such as adding static routes, the nexthop may not be available, this PR would trigger an attempt to resolve the neighbor. Note that routeorch would only trigger resolution of a neighbor once even if it fails. The arp_update script would find the unresolved neighbor and retry neighbor resolution. --- cfgmgr/nbrmgr.cpp | 7 + orchagent/neighorch.cpp | 25 +++ orchagent/neighorch.h | 8 +- orchagent/routeorch.cpp | 10 +- tests/test_route.py | 314 ++++++++++++++++++++++++++++++++++++++ tests/test_warm_reboot.py | 10 +- 6 files changed, 368 insertions(+), 6 deletions(-) diff --git a/cfgmgr/nbrmgr.cpp b/cfgmgr/nbrmgr.cpp index ad10fef9f4a8..39d8edf9b08c 100644 --- a/cfgmgr/nbrmgr.cpp +++ b/cfgmgr/nbrmgr.cpp @@ -234,6 +234,13 @@ void NbrMgr::doResolveNeighTask(Consumer &consumer) while (it != consumer.m_toSync.end()) { KeyOpFieldsValuesTuple t = it->second; + if (kfvOp(t) == DEL_COMMAND) + { + SWSS_LOG_INFO("Received DEL operation for %s, skipping", kfvKey(t).c_str()); + it = consumer.m_toSync.erase(it); + continue; + } + vector keys = parseAliasIp(kfvKey(t), consumer.getConsumerTable()->getTableNameSeparator().c_str()); MacAddress mac; diff --git a/orchagent/neighorch.cpp b/orchagent/neighorch.cpp index 9a2f76f571e5..c7c18b3e31e6 100644 --- a/orchagent/neighorch.cpp +++ b/orchagent/neighorch.cpp @@ -73,6 +73,25 @@ bool NeighOrch::resolveNeighborEntry(const NeighborEntry &entry, const MacAddres return true; } +void NeighOrch::resolveNeighbor(const NeighborEntry &entry) +{ + if (m_neighborToResolve.find(entry) == m_neighborToResolve.end()) // TODO: Allow retry for unresolved neighbors + { + resolveNeighborEntry(entry, MacAddress()); + m_neighborToResolve.insert(entry); + } + + return; +} + +void NeighOrch::clearResolvedNeighborEntry(const NeighborEntry &entry) +{ + string key, alias = entry.alias; + key = alias + ":" + entry.ip_address.to_string(); + m_appNeighResolveProducer.del(key); + return; +} + /* * Function Name: processFDBFlushUpdate * Description: @@ -214,6 +233,12 @@ bool NeighOrch::addNextHop(const IpAddress &ipAddress, const string &alias) SWSS_LOG_NOTICE("Created next hop %s on %s", ipAddress.to_string().c_str(), alias.c_str()); + if (m_neighborToResolve.find(nexthop) != m_neighborToResolve.end()) + { + clearResolvedNeighborEntry(nexthop); + m_neighborToResolve.erase(nexthop); + SWSS_LOG_INFO("Resolved neighbor for %s", nexthop.to_string().c_str()); + } NextHopEntry next_hop_entry; next_hop_entry.next_hop_id = next_hop_id; diff --git a/orchagent/neighorch.h b/orchagent/neighorch.h index 60981e9f11c2..8cd68f036161 100644 --- a/orchagent/neighorch.h +++ b/orchagent/neighorch.h @@ -74,6 +74,8 @@ class NeighOrch : public Orch, public Subject, public Observer bool addInbandNeighbor(string alias, IpAddress ip_address); bool delInbandNeighbor(string alias, IpAddress ip_address); + void resolveNeighbor(const NeighborEntry &); + private: PortsOrch *m_portsOrch; IntfsOrch *m_intfsOrch; @@ -83,6 +85,8 @@ class NeighOrch : public Orch, public Subject, public Observer NeighborTable m_syncdNeighbors; NextHopTable m_syncdNextHops; + std::set m_neighborToResolve; + bool addNextHop(const IpAddress&, const string&); bool removeNextHop(const IpAddress&, const string&); @@ -93,7 +97,6 @@ class NeighOrch : public Orch, public Subject, public Observer bool clearNextHopFlag(const NextHopKey &, const uint32_t); void processFDBFlushUpdate(const FdbFlushUpdate &); - bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &); void doTask(Consumer &consumer); void doVoqSystemNeighTask(Consumer &consumer); @@ -104,6 +107,9 @@ class NeighOrch : public Orch, public Subject, public Observer bool addVoqEncapIndex(string &alias, IpAddress &ip, vector &neighbor_attrs); void voqSyncAddNeigh(string &alias, IpAddress &ip_address, const MacAddress &mac, sai_neighbor_entry_t &neighbor_entry); void voqSyncDelNeigh(string &alias, IpAddress &ip_address); + + bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &); + void clearResolvedNeighborEntry(const NeighborEntry &); }; #endif /* SWSS_NEIGHORCH_H */ diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index ca3feec9c3bd..0f7209c3614b 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -1436,8 +1436,9 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) } else { - SWSS_LOG_INFO("Failed to get next hop %s for %s", + SWSS_LOG_INFO("Failed to get next hop %s for %s, resolving neighbor", nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + m_neighOrch->resolveNeighbor(nexthop); return false; } } @@ -1484,8 +1485,15 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) return false; } } + else + { + SWSS_LOG_INFO("Failed to get next hop %s in %s, resolving neighbor", + nextHop.to_string().c_str(), nextHops.to_string().c_str()); + m_neighOrch->resolveNeighbor(nextHop); + } } } + /* Failed to create the next hop group and check if a temporary route is needed */ /* If the current next hop is part of the next hop group to sync, diff --git a/tests/test_route.py b/tests/test_route.py index e22ead135f03..341ef53bbd81 100644 --- a/tests/test_route.py +++ b/tests/test_route.py @@ -233,6 +233,320 @@ def test_RouteAddRemoveIpv6Route(self, dvs, testlog): dvs.servers[1].runcmd("ip -6 route del default dev eth0") dvs.servers[1].runcmd("ip -6 address del 2001::2/64 dev eth0") + def test_RouteAddRemoveIpv4RouteResolveNeigh(self, dvs, testlog): + self.setup_db(dvs) + + self.clear_srv_config(dvs) + + # create l3 interface + self.create_l3_intf("Ethernet0", "") + self.create_l3_intf("Ethernet4", "") + + # set ip address + self.add_ip_address("Ethernet0", "10.0.0.0/31") + self.add_ip_address("Ethernet4", "10.0.0.2/31") + + # bring up interface + self.set_admin_status("Ethernet0", "up") + self.set_admin_status("Ethernet4", "up") + + # set ip address and default route + dvs.servers[0].runcmd("ip address add 10.0.0.1/31 dev eth0") + dvs.servers[0].runcmd("ip route add default via 10.0.0.0") + + dvs.servers[1].runcmd("ip address add 10.0.0.3/31 dev eth0") + dvs.servers[1].runcmd("ip route add default via 10.0.0.2") + time.sleep(2) + + # add route entry -- single nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 2.2.2.0/24 10.0.0.1\"") + + # add route entry -- multiple nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 3.3.3.0/24 10.0.0.1\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 3.3.3.0/24 10.0.0.3\"") + + # check application database + self.pdb.wait_for_entry("ROUTE_TABLE", "2.2.2.0/24") + self.pdb.wait_for_entry("ROUTE_TABLE", "3.3.3.0/24") + + # check neighbor got resolved and removed from NEIGH_RESOLVE_TABLE + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet0:10.0.0.1") + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet4:10.0.0.3") + + # check ASIC route database + self.check_route_entries(["2.2.2.0/24", "3.3.3.0/24"]) + + # remove route entry + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 2.2.2.0/24 10.0.0.1\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 3.3.3.0/24 10.0.0.1\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 3.3.3.0/24 10.0.0.3\"") + + # check application database + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "2.2.2.0/24") + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "3.3.3.0/24") + + # check ASIC route database + self.check_deleted_route_entries(["2.2.2.0/24", "3.3.3.0/24"]) + + # remove ip address + self.remove_ip_address("Ethernet0", "10.0.0.0/31") + self.remove_ip_address("Ethernet4", "10.0.0.2/31") + + # remove l3 interface + self.remove_l3_intf("Ethernet0") + self.remove_l3_intf("Ethernet4") + + self.set_admin_status("Ethernet0", "down") + self.set_admin_status("Ethernet4", "down") + + # remove ip address and default route + dvs.servers[0].runcmd("ip route del default dev eth0") + dvs.servers[0].runcmd("ip address del 10.0.0.1/31 dev eth0") + + dvs.servers[1].runcmd("ip route del default dev eth0") + dvs.servers[1].runcmd("ip address del 10.0.0.3/31 dev eth0") + + def test_RouteAddRemoveIpv6RouteResolveNeigh(self, dvs, testlog): + self.setup_db(dvs) + + # create l3 interface + self.create_l3_intf("Ethernet0", "") + self.create_l3_intf("Ethernet4", "") + + # bring up interface + self.set_admin_status("Ethernet0", "up") + self.set_admin_status("Ethernet4", "up") + + # set ip address + self.add_ip_address("Ethernet0", "2000::1/64") + self.add_ip_address("Ethernet4", "2001::1/64") + dvs.runcmd("sysctl -w net.ipv6.conf.all.forwarding=1") + + # set ip address and default route + dvs.servers[0].runcmd("ip -6 address add 2000::2/64 dev eth0") + dvs.servers[0].runcmd("ip -6 route add default via 2000::1") + + dvs.servers[1].runcmd("ip -6 address add 2001::2/64 dev eth0") + dvs.servers[1].runcmd("ip -6 route add default via 2001::1") + time.sleep(2) + + # add route entry -- single nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ipv6 route 3000::0/64 2000::2\"") + + # add route entry -- multiple nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ipv6 route 4000::0/64 2000::2\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ipv6 route 4000::0/64 2001::2\"") + + # check application database + self.pdb.wait_for_entry("ROUTE_TABLE", "3000::/64") + self.pdb.wait_for_entry("ROUTE_TABLE", "4000::/64") + + # check neighbor got resolved and removed from NEIGH_RESOLVE_TABLE + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet0:2000::2") + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet4:2001::2") + + # check ASIC route database + self.check_route_entries(["3000::/64", "4000::/64"]) + + # remove route entry + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ipv6 route 3000::0/64 2000::2\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ipv6 route 4000::0/64 2000::2\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ipv6 route 4000::0/64 2001::2\"") + + # check application database + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "3000::/64") + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "4000::/64") + + # check ASIC route database + self.check_deleted_route_entries(["3000::/64", "4000::/64"]) + + # remove ip address + self.remove_ip_address("Ethernet0", "2000::1/64") + self.remove_ip_address("Ethernet4", "2001::1/64") + + # remove l3 interface + self.remove_l3_intf("Ethernet0") + self.remove_l3_intf("Ethernet4") + + self.set_admin_status("Ethernet0", "down") + self.set_admin_status("Ethernet4", "down") + + # remove ip address and default route + dvs.servers[0].runcmd("ip -6 route del default dev eth0") + dvs.servers[0].runcmd("ip -6 address del 2000::2/64 dev eth0") + + dvs.servers[1].runcmd("ip -6 route del default dev eth0") + dvs.servers[1].runcmd("ip -6 address del 2001::2/64 dev eth0") + + def test_RouteAddRemoveIpv4RouteUnresolvedNeigh(self, dvs, testlog): + self.setup_db(dvs) + + self.clear_srv_config(dvs) + + # create l3 interface + self.create_l3_intf("Ethernet0", "") + self.create_l3_intf("Ethernet4", "") + + # set ip address + self.add_ip_address("Ethernet0", "10.0.0.0/31") + self.add_ip_address("Ethernet4", "10.0.0.2/31") + + # bring up interface + self.set_admin_status("Ethernet0", "up") + self.set_admin_status("Ethernet4", "up") + + # add route entry -- single nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 2.2.2.0/24 10.0.0.1\"") + + # add route entry -- multiple nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 3.3.3.0/24 10.0.0.1\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ip route 3.3.3.0/24 10.0.0.3\"") + + # check application database + self.pdb.wait_for_entry("ROUTE_TABLE", "2.2.2.0/24") + self.pdb.wait_for_entry("ROUTE_TABLE", "3.3.3.0/24") + + # check for unresolved neighbor entries + self.pdb.wait_for_entry("NEIGH_RESOLVE_TABLE", "Ethernet0:10.0.0.1") + self.pdb.wait_for_entry("NEIGH_RESOLVE_TABLE", "Ethernet4:10.0.0.3") + + # check routes does not show up in ASIC_DB + self.check_deleted_route_entries(["2.2.2.0/24", "3.3.3.0/24"]) + + # set ip address and default route + dvs.servers[0].runcmd("ip address add 10.0.0.1/31 dev eth0") + dvs.servers[0].runcmd("ip route add default via 10.0.0.0") + + dvs.servers[1].runcmd("ip address add 10.0.0.3/31 dev eth0") + dvs.servers[1].runcmd("ip route add default via 10.0.0.2") + time.sleep(2) + + # check application database + self.pdb.wait_for_entry("ROUTE_TABLE", "2.2.2.0/24") + self.pdb.wait_for_entry("ROUTE_TABLE", "3.3.3.0/24") + + # check neighbor got resolved and removed from NEIGH_RESOLVE_TABLE + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet0:10.0.0.1") + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet4:10.0.0.3") + + # check ASIC route database + self.check_route_entries(["2.2.2.0/24", "3.3.3.0/24"]) + + # remove route entry + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 2.2.2.0/24 10.0.0.1\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 3.3.3.0/24 10.0.0.1\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ip route 3.3.3.0/24 10.0.0.3\"") + + # check application database + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "2.2.2.0/24") + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "3.3.3.0/24") + + # check ASIC route database + self.check_deleted_route_entries(["2.2.2.0/24", "3.3.3.0/24"]) + + # remove ip address + self.remove_ip_address("Ethernet0", "10.0.0.0/31") + self.remove_ip_address("Ethernet4", "10.0.0.2/31") + + # remove l3 interface + self.remove_l3_intf("Ethernet0") + self.remove_l3_intf("Ethernet4") + + self.set_admin_status("Ethernet0", "down") + self.set_admin_status("Ethernet4", "down") + + # remove ip address and default route + dvs.servers[0].runcmd("ip route del default dev eth0") + dvs.servers[0].runcmd("ip address del 10.0.0.1/31 dev eth0") + + dvs.servers[1].runcmd("ip route del default dev eth0") + dvs.servers[1].runcmd("ip address del 10.0.0.3/31 dev eth0") + + def test_RouteAddRemoveIpv6RouteUnresolvedNeigh(self, dvs, testlog): + self.setup_db(dvs) + + # create l3 interface + self.create_l3_intf("Ethernet0", "") + self.create_l3_intf("Ethernet4", "") + + # bring up interface + self.set_admin_status("Ethernet0", "up") + self.set_admin_status("Ethernet4", "up") + + # set ip address + self.add_ip_address("Ethernet0", "2000::1/64") + self.add_ip_address("Ethernet4", "2001::1/64") + dvs.runcmd("sysctl -w net.ipv6.conf.all.forwarding=1") + + # add route entry -- single nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ipv6 route 3000::0/64 2000::2\"") + + # add route entry -- multiple nexthop + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ipv6 route 4000::0/64 2000::2\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"ipv6 route 4000::0/64 2001::2\"") + + # check application database + self.pdb.wait_for_entry("ROUTE_TABLE", "3000::/64") + self.pdb.wait_for_entry("ROUTE_TABLE", "4000::/64") + + # check for unresolved neighbor entries + self.pdb.wait_for_entry("NEIGH_RESOLVE_TABLE", "Ethernet0:2000::2") + self.pdb.wait_for_entry("NEIGH_RESOLVE_TABLE", "Ethernet4:2001::2") + + # check routes does not show up in ASIC_DB + self.check_deleted_route_entries(["3000::/64", "4000::/64"]) + + # set ip address and default route + dvs.servers[0].runcmd("ip -6 address add 2000::2/64 dev eth0") + dvs.servers[0].runcmd("ip -6 route add default via 2000::1") + + dvs.servers[1].runcmd("ip -6 address add 2001::2/64 dev eth0") + dvs.servers[1].runcmd("ip -6 route add default via 2001::1") + time.sleep(5) + + dvs.servers[0].runcmd("ping -6 -c 1 2001::2") + + # check application database + self.pdb.wait_for_entry("ROUTE_TABLE", "3000::/64") + self.pdb.wait_for_entry("ROUTE_TABLE", "4000::/64") + + # check neighbor got resolved and removed from NEIGH_RESOLVE_TABLE + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet0:2000::2") + self.pdb.wait_for_deleted_entry("NEIGH_RESOLVE_TABLE", "Ethernet4:2001::2") + + # check ASIC route database + self.check_route_entries(["3000::/64", "4000::/64"]) + + # remove route entry + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ipv6 route 3000::0/64 2000::2\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ipv6 route 4000::0/64 2000::2\"") + dvs.runcmd("vtysh -c \"configure terminal\" -c \"no ipv6 route 4000::0/64 2001::2\"") + + # check application database + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "3000::/64") + self.pdb.wait_for_deleted_entry("ROUTE_TABLE", "4000::/64") + + # check ASIC route database + self.check_deleted_route_entries(["3000::/64", "4000::/64"]) + + # remove ip address + self.remove_ip_address("Ethernet0", "2000::1/64") + self.remove_ip_address("Ethernet4", "2001::1/64") + + # remove l3 interface + self.remove_l3_intf("Ethernet0") + self.remove_l3_intf("Ethernet4") + + self.set_admin_status("Ethernet0", "down") + self.set_admin_status("Ethernet4", "down") + + # remove ip address and default route + dvs.servers[0].runcmd("ip -6 route del default dev eth0") + dvs.servers[0].runcmd("ip -6 address del 2000::2/64 dev eth0") + + dvs.servers[1].runcmd("ip -6 route del default dev eth0") + dvs.servers[1].runcmd("ip -6 address del 2001::2/64 dev eth0") + def test_RouteAddRemoveIpv4RouteWithVrf(self, dvs, testlog): self.setup_db(dvs) diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index c7d6f5a67315..36028dfc6985 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -869,11 +869,13 @@ def test_OrchagentWarmRestartReadyCheck(self, dvs, testlog): appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0) ps = swsscommon.ProducerStateTable(appl_db, swsscommon.APP_ROUTE_TABLE_NAME) fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.1"), ("ifname", "Ethernet0")]) - ps.set("2.2.2.0/24", fvs) + fvs = swsscommon.FieldValuePairs([("nexthop","20.0.0.1"), ("ifname", "Ethernet0")]) + ps.set("3.3.3.0/24", fvs) + time.sleep(1) - # Should fail, since neighbor for next 10.0.0.1 has not been not resolved yet + # Should fail, since neighbor for next 20.0.0.1 has not been not resolved yet (exitcode, result) = dvs.runcmd("/usr/bin/orchagent_restart_check") assert result == "RESTARTCHECK failed\n" @@ -882,8 +884,8 @@ def test_OrchagentWarmRestartReadyCheck(self, dvs, testlog): (exitcode, result) = dvs.runcmd("/usr/bin/orchagent_restart_check -n -s -w 500") assert result == "RESTARTCHECK succeeded\n" - # get neighbor and arp entry - dvs.servers[1].runcmd("ping -c 1 10.0.0.1") + # Remove unfinished routes + ps._del("3.3.3.0/24") time.sleep(1) (exitcode, result) = dvs.runcmd("/usr/bin/orchagent_restart_check")