diff --git a/orchagent/fgnhgorch.cpp b/orchagent/fgnhgorch.cpp index 4111665e09d3..0deabdb24d83 100644 --- a/orchagent/fgnhgorch.cpp +++ b/orchagent/fgnhgorch.cpp @@ -317,6 +317,7 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout bool FgNhgOrch::removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_route_entry) { SWSS_LOG_ENTER(); + sai_status_t status; for (auto nhgm : syncd_fg_route_entry->nhopgroup_members) @@ -337,6 +338,34 @@ bool FgNhgOrch::removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_rout if (!gRouteOrch->removeFineGrainedNextHopGroup(syncd_fg_route_entry->next_hop_group_id)) { + SWSS_LOG_ERROR("Failed to remove nhgid %" PRIx64 " return failure", + syncd_fg_route_entry->next_hop_group_id); + return false; + } + + return true; +} + + +bool FgNhgOrch::modifyRoutesNextHopId(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, sai_object_id_t next_hop_id) +{ + SWSS_LOG_ENTER(); + + sai_route_entry_t route_entry; + sai_attribute_t route_attr; + + route_entry.vr_id = vrf_id; + route_entry.switch_id = gSwitchId; + copy(route_entry.destination, ipPrefix); + + route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; + route_attr.value.oid = next_hop_id; + + sai_status_t status = sai_route_api->set_route_entry_attribute(&route_entry, &route_attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to set route %s with packet action forward, %d", + ipPrefix.to_string().c_str(), status); return false; } @@ -385,22 +414,56 @@ bool FgNhgOrch::validNextHopInNextHopGroup(const NextHopKey& nexthop) return true; } - for (auto active_nh : syncd_fg_route_entry->active_nexthops) + if (fgNhgEntry->hash_bucket_indices.size() == 0 && syncd_fg_route_entry->points_to_rif) { - bank_member_changes[fgNhgEntry->next_hops[active_nh.ip_address].bank]. - active_nhs.push_back(active_nh); + /* Only happens the 1st time when hash_bucket_indices are not inited + */ + for (auto it : fgNhgEntry->next_hops) + { + while (bank_member_changes.size() <= it.second.bank) + { + bank_member_changes.push_back(BankMemberChanges()); + } + } } bank_member_changes[fgNhgEntry->next_hops[nexthop.ip_address].bank]. nhs_to_add.push_back(nexthop); nhopgroup_members_set[nexthop] = m_neighOrch->getNextHopId(nexthop); - if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, - bank_member_changes, nhopgroup_members_set, route_table.first)) + if (syncd_fg_route_entry->points_to_rif) { - SWSS_LOG_ERROR("Failed to set fine grained next hop %s", - nexthop.to_string().c_str()); - return false; + // RIF route is now neigh resolved: create Fine Grained ECMP + if (!createFineGrainedNextHopGroup(*syncd_fg_route_entry, fgNhgEntry, syncd_fg_route_entry->nhg_key)) + { + return false; + } + + if (!setNewNhgMembers(*syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, route_table.first)) + { + return false; + } + + if (!modifyRoutesNextHopId(route_tables.first, route_table.first, syncd_fg_route_entry->next_hop_group_id)) + { + return false; + } + } + else + { + for (auto active_nh : syncd_fg_route_entry->active_nexthops) + { + bank_member_changes[fgNhgEntry->next_hops[active_nh.ip_address].bank]. + active_nhs.push_back(active_nh); + } + + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + bank_member_changes, nhopgroup_members_set, route_table.first)) + { + SWSS_LOG_ERROR("Failed to set fine grained next hop %s", + nexthop.to_string().c_str()); + return false; + } } m_neighOrch->increaseNextHopRefCount(nexthop); @@ -760,14 +823,46 @@ bool FgNhgOrch::setInactiveBankToNextAvailableActiveBank(FGNextHopGroupEntry *sy if (new_bank_idx == bank_member_changes.size()) { - SWSS_LOG_NOTICE("All banks of FG next-hops are down for prefix %s", - ipPrefix.to_string().c_str()); /* Case where there are no active banks */ - /* Note: There is no way to set a NULL OID to the now inactive next-hops - * so we leave the next-hops as is in SAI, and future route/neighbor changes - * will take care of setting the next-hops to the correctly active nhs + SWSS_LOG_NOTICE("All banks of FG next-hops are down for prefix %s", + ipPrefix.to_string().c_str()); + + /* This may occur when there are no neigh entries available any more + * set route pointing to rif to allow for neigh resolution in kernel. + * If route already points to rif then we are done. */ - syncd_fg_route_entry->syncd_fgnhg_map[bank].clear(); + if (!syncd_fg_route_entry->points_to_rif) + { + std::string interface_alias = syncd_fg_route_entry->nhg_key.getNextHops().begin()->alias; + sai_object_id_t rif_next_hop_id = m_intfsOrch->getRouterIntfsId(interface_alias); + if (rif_next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get rif next hop for %s", interface_alias.c_str()); + return false; + } + if (!modifyRoutesNextHopId(gVirtualRouterId, ipPrefix, rif_next_hop_id)) + { + SWSS_LOG_ERROR("Failed to modify route nexthopid to rif"); + return false; + } + + if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry)) + { + SWSS_LOG_ERROR("Failed to delete Fine Grained next hop group"); + return false; + } + + syncd_fg_route_entry->points_to_rif = true; + syncd_fg_route_entry->next_hop_group_id = rif_next_hop_id; + + // remove state_db entry + m_stateWarmRestartRouteTable.del(ipPrefix.to_string()); + // Clear data structures + syncd_fg_route_entry->syncd_fgnhg_map.clear(); + syncd_fg_route_entry->active_nexthops.clear(); + syncd_fg_route_entry->inactive_to_active_map.clear(); + syncd_fg_route_entry->nhopgroup_members.clear(); + } } return true; @@ -1017,6 +1112,7 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh { m_recoveryMap.erase(nexthopsMap); } + syncd_fg_route_entry.points_to_rif = false; return true; } @@ -1094,13 +1190,13 @@ bool FgNhgOrch::syncdContainsFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPre bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops, - sai_object_id_t &next_hop_id, bool &prevNhgWasFineGrained) + sai_object_id_t &next_hop_id, bool &isNextHopIdChanged) { SWSS_LOG_ENTER(); - /* default prevNhgWasFineGrained to true so that sai route is unaffected + /* default isNextHopIdChanged to false so that sai route is unaffected * when we return early with success */ - prevNhgWasFineGrained = true; + isNextHopIdChanged = false; FgNhgEntry *fgNhgEntry = 0; set next_hop_set = nextHops.getNextHops(); auto prefix_entry = m_fgNhgPrefixes.find(ipPrefix); @@ -1123,7 +1219,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const break; } } - + if (m_syncdFGRouteTables.find(vrf_id) != m_syncdFGRouteTables.end() && m_syncdFGRouteTables.at(vrf_id).find(ipPrefix) != m_syncdFGRouteTables.at(vrf_id).end() && m_syncdFGRouteTables.at(vrf_id).at(ipPrefix).nhg_key == nextHops) @@ -1150,7 +1246,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const */ for (auto it : fgNhgEntry->next_hops) { - while(bank_member_changes.size() <= it.second.bank) + while (bank_member_changes.size() <= it.second.bank) { bank_member_changes.push_back(BankMemberChanges()); } @@ -1202,6 +1298,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const { bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank]. nhs_to_add.push_back(nhk); + next_hop_to_add = true; } } @@ -1211,54 +1308,81 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const if (syncd_fg_route_entry_it != m_syncdFGRouteTables.at(vrf_id).end()) { + /* Route exists and nh was associated in the past */ FGNextHopGroupEntry *syncd_fg_route_entry = &(syncd_fg_route_entry_it->second); - /* Route exists, update FG ECMP group in SAI */ - for (auto nhk : syncd_fg_route_entry->active_nexthops) + + if (syncd_fg_route_entry->points_to_rif) { - if (nhopgroup_members_set.find(nhk) == nhopgroup_members_set.end()) + if (next_hop_to_add) { - bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank]. - nhs_to_del.push_back(nhk); + isNextHopIdChanged = true; + if (!createFineGrainedNextHopGroup(*syncd_fg_route_entry, fgNhgEntry, nextHops)) + { + return false; + } + + if (!setNewNhgMembers(*syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + return false; + } } - else + } + else + { + /* Update FG ECMP group in SAI */ + for (auto nhk : syncd_fg_route_entry->active_nexthops) { - bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank]. - active_nhs.push_back(nhk); + if (nhopgroup_members_set.find(nhk) == nhopgroup_members_set.end()) + { + bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank]. + nhs_to_del.push_back(nhk); + } + else + { + bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank]. + active_nhs.push_back(nhk); + } } - } - if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, - nhopgroup_members_set, ipPrefix)) - { - return false; + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, + nhopgroup_members_set, ipPrefix)) + { + return false; + } } } else { /* New route + nhg addition */ - prevNhgWasFineGrained = false; - if (next_hop_to_add == false) - { - SWSS_LOG_INFO("There were no valid next-hops to add %s:%s", ipPrefix.to_string().c_str(), - nextHops.to_string().c_str()); - /* Let the route retry logic(upon false rc) take care of this case */ - return false; - } - + isNextHopIdChanged = true; FGNextHopGroupEntry syncd_fg_route_entry; - if (!createFineGrainedNextHopGroup(syncd_fg_route_entry, fgNhgEntry, nextHops)) + if (next_hop_to_add) { - return false; - } + if (!createFineGrainedNextHopGroup(syncd_fg_route_entry, fgNhgEntry, nextHops)) + { + return false; + } - if (!setNewNhgMembers(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix)) + if (!setNewNhgMembers(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + return false; + } + } + else { - return false; + sai_object_id_t rif_next_hop_id = m_intfsOrch->getRouterIntfsId(next_hop_set.begin()->alias); + if (rif_next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get rif next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } + + syncd_fg_route_entry.next_hop_group_id = rif_next_hop_id; + syncd_fg_route_entry.points_to_rif = true; } m_syncdFGRouteTables[vrf_id][ipPrefix] = syncd_fg_route_entry; - - SWSS_LOG_NOTICE("Created route %s:%s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } m_syncdFGRouteTables[vrf_id][ipPrefix].nhg_key = nextHops; @@ -1310,19 +1434,22 @@ bool FgNhgOrch::removeFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) } FGNextHopGroupEntry *syncd_fg_route_entry = &(it_route->second); - if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry)) + if (!syncd_fg_route_entry->points_to_rif) { - SWSS_LOG_ERROR("Failed to clean-up fine grained ECMP SAI group"); - return false; - } + if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry)) + { + SWSS_LOG_ERROR("Failed to clean-up fine grained ECMP SAI group"); + return false; + } - for (auto nh : syncd_fg_route_entry->active_nexthops) - { - m_neighOrch->decreaseNextHopRefCount(nh); - } + for (auto nh : syncd_fg_route_entry->active_nexthops) + { + m_neighOrch->decreaseNextHopRefCount(nh); + } - // remove state_db entry - m_stateWarmRestartRouteTable.del(ipPrefix.to_string()); + // remove state_db entry + m_stateWarmRestartRouteTable.del(ipPrefix.to_string()); + } it_route_table->second.erase(it_route); if (it_route_table->second.size() == 0) diff --git a/orchagent/fgnhgorch.h b/orchagent/fgnhgorch.h index 42b2b7f615c8..b7ec844a5906 100644 --- a/orchagent/fgnhgorch.h +++ b/orchagent/fgnhgorch.h @@ -30,6 +30,7 @@ struct FGNextHopGroupEntry BankFGNextHopGroupMap syncd_fgnhg_map; // Map of (bank) -> (nexthops) -> (index in nhopgroup_members) NextHopGroupKey nhg_key; // Full next hop group key InactiveBankMapsToBank inactive_to_active_map; // Maps an inactive bank to an active one in terms of hash bkts + bool points_to_rif; // Flag to identify that route is currently pointing to a rif }; struct FGNextHopInfo @@ -104,7 +105,7 @@ class FgNhgOrch : public Orch, public Observer bool syncdContainsFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix); bool validNextHopInNextHopGroup(const NextHopKey&); bool invalidNextHopInNextHopGroup(const NextHopKey&); - bool setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops, sai_object_id_t &next_hop_id, bool &prevNhgWasFineGrained); + bool setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops, sai_object_id_t &next_hop_id, bool &isNextHopIdChanged); bool removeFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix); // warm reboot support @@ -150,10 +151,10 @@ class FgNhgOrch : public Orch, public Observer void setStateDbRouteEntry(const IpPrefix&, uint32_t index, NextHopKey nextHop); bool writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid, const IpPrefix &ipPrefix, NextHopKey nextHop); + bool modifyRoutesNextHopId(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, sai_object_id_t next_hop_id); bool createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, const NextHopGroupKey &nextHops); bool removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_route_entry); - vector generateRouteTableFromNhgKey(NextHopGroupKey nhg); void cleanupIpInLinkToIpMap(const string &link, const IpAddress &ip, FgNhgEntry &fgNhg_entry); diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index 04907b3de911..d54e205dedf4 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -1411,7 +1411,7 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) bool overlay_nh = false; bool status = false; bool curNhgIsFineGrained = false; - bool prevNhgWasFineGrained = false; + bool isFineGrainedNextHopIdChanged = false; bool blackhole = false; if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end()) @@ -1434,9 +1434,9 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) /* We get 3 return values from setFgNhg: * 1. success/failure: on addition/modification of nexthop group/members * 2. next_hop_id: passed as a param to fn, used for sai route creation - * 3. prevNhgWasFineGrained: passed as a param to fn, used to determine transitions + * 3. isFineGrainedNextHopIdChanged: passed as a param to fn, used to determine transitions * between regular and FG ECMP, this is an optimization to prevent multiple lookups */ - if (!m_fgNhgOrch->setFgNhg(vrf_id, ipPrefix, nextHops, next_hop_id, prevNhgWasFineGrained)) + if (!m_fgNhgOrch->setFgNhg(vrf_id, ipPrefix, nextHops, next_hop_id, isFineGrainedNextHopIdChanged)) { return false; } @@ -1627,7 +1627,7 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &route_attr); } - if (curNhgIsFineGrained && prevNhgWasFineGrained) + if (curNhgIsFineGrained && !isFineGrainedNextHopIdChanged) { /* Don't change route entry if the route is previously fine grained and new nhg is also fine grained. * We already modifed sai nhg objs as part of setFgNhg to account for nhg change. */ diff --git a/tests/test_fgnhg.py b/tests/test_fgnhg.py index 9e465935ada1..2fa8a9d89090 100644 --- a/tests/test_fgnhg.py +++ b/tests/test_fgnhg.py @@ -20,6 +20,7 @@ ASIC_NHG = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP" ASIC_NHG_MEMB = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER" ASIC_NH_TB = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP" +ASIC_RIF = "ASIC_STATE:SAI_OBJECT_TYPE_ROUTER_INTERFACE" def create_entry(db, table, key, pairs): db.create_entry(table, key, pairs) @@ -79,6 +80,33 @@ def _access_function(): failure_message="Fine Grained ECMP route not found") return result +def validate_asic_nhg_router_interface(asic_db, ipprefix): + def _access_function(): + false_ret = (False, '') + keys = asic_db.get_keys(ASIC_ROUTE_TB) + key = '' + route_exists = False + for k in keys: + rt_key = json.loads(k) + if rt_key['dest'] == ipprefix: + route_exists = True + key = k + if not route_exists: + return false_ret + + fvs = asic_db.get_entry(ASIC_ROUTE_TB, key) + if not fvs: + return false_ret + + rifid = fvs.get("SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID") + fvs = asic_db.get_entry(ASIC_RIF, rifid) + if not fvs: + return false_ret + + return (True, rifid) + _, result = wait_for_result(_access_function, failure_message="Route pointing to RIF not found") + return result + def validate_asic_nhg_regular_ecmp(asic_db, ipprefix): def _access_function(): false_ret = (False, '') @@ -338,8 +366,9 @@ def fine_grained_ecmp_base_test(dvs, match_mode): found_route = True break - # Since we didn't populate ARP yet, the route shouldn't be programmed - assert (found_route == False) + # Since we didn't populate ARP yet, route should point to RIF for kernel arp resolution to occur + assert (found_route == True) + validate_asic_nhg_router_interface(asic_db, fg_nhg_prefix) asic_nh_count = len(asic_db.get_keys(ASIC_NH_TB)) dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") @@ -484,6 +513,9 @@ def fine_grained_ecmp_base_test(dvs, match_mode): # bring all links up one by one startup_link(dvs, app_db, 3) startup_link(dvs, app_db, 4) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + nh_oid_map = get_nh_oid_map(asic_db) nh_memb_exp_count = {"10.0.0.7":30,"10.0.0.9":30} validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size)