diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 2a266b7e7b38c..02c2d20dc6732 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -46,7 +46,7 @@ Resources ========= The ``netdevsim`` driver exposes resources to control the number of FIB -entries and FIB rule entries that the driver will allow. +entries, FIB rule entries and nexthops that the driver will allow. .. code:: shell @@ -54,6 +54,7 @@ entries and FIB rule entries that the driver will allow. $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16 $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64 $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16 + $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16 $ devlink dev reload netdevsim/netdevsim0 Driver-specific Traps diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d070614176755..49cc1fea9e02b 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -324,6 +324,12 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } + /* Resources for nexthops */ + err = devlink_resource_register(devlink, "nexthops", (u64)-1, + NSIM_RESOURCE_NEXTHOPS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); + out: return err; } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index deea17a0e79c9..45d8a7790bd5b 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "netdevsim.h" @@ -42,9 +43,12 @@ struct nsim_fib_data { struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; + struct nsim_fib_entry nexthops; struct rhashtable fib_rt_ht; struct list_head fib_rt_list; spinlock_t fib_lock; /* Protects hashtable, list and accounting */ + struct notifier_block nexthop_nb; + struct rhashtable nexthop_ht; struct devlink *devlink; }; @@ -86,6 +90,19 @@ static const struct rhashtable_params nsim_fib_rt_ht_params = { .automatic_shrinking = true, }; +struct nsim_nexthop { + struct rhash_head ht_node; + u64 occ; + u32 id; +}; + +static const struct rhashtable_params nsim_nexthop_ht_params = { + .key_offset = offsetof(struct nsim_nexthop, id), + .head_offset = offsetof(struct nsim_nexthop, ht_node), + .key_len = sizeof(u32), + .automatic_shrinking = true, +}; + u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max) { @@ -104,6 +121,9 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: return 0; } @@ -129,6 +149,9 @@ static void nsim_fib_set_max(struct nsim_fib_data *fib_data, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: WARN_ON(1); return; @@ -389,11 +412,6 @@ static int nsim_fib4_event(struct nsim_fib_data *data, fen_info = container_of(info, struct fib_entry_notifier_info, info); - if (fen_info->fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return 0; - } - switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib4_rt_insert(data, fen_info); @@ -704,11 +722,6 @@ static int nsim_fib6_event(struct nsim_fib_data *data, fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - if (fen6_info->rt->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); - return 0; - } - if (fen6_info->rt->fib6_src.plen) { NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported"); return 0; @@ -838,6 +851,196 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) data->ipv6.rules.num = 0ULL; } +static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + u64 occ = 0; + int i; + + nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); + if (!nexthop) + return NULL; + + nexthop->id = info->id; + + /* Determine the number of nexthop entries the new nexthop will + * occupy. + */ + + if (!info->is_grp) { + occ = 1; + goto out; + } + + for (i = 0; i < info->nh_grp->num_nh; i++) + occ += info->nh_grp->nh_entries[i].weight; + +out: + nexthop->occ = occ; + return nexthop; +} + +static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop) +{ + kfree(nexthop); +} + +static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ, + bool add, struct netlink_ext_ack *extack) +{ + int err = 0; + + if (add) { + if (data->nexthops.num + occ <= data->nexthops.max) { + data->nexthops.num += occ; + } else { + err = -ENOSPC; + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); + } + } else { + if (WARN_ON(occ > data->nexthops.num)) + return -EINVAL; + data->nexthops.num -= occ; + } + + return err; +} + +static int nsim_nexthop_add(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_insert_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to insert nexthop"); + goto err_nexthop_dismiss; + } + + nexthop_set_hw_flags(net, nexthop->id, false, true); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_replace(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct nsim_nexthop *nexthop_old, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_replace_fast(&data->nexthop_ht, + &nexthop_old->ht_node, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace nexthop"); + goto err_nexthop_dismiss; + } + + nexthop_set_hw_flags(net, nexthop->id, false, true); + nsim_nexthop_account(data, nexthop_old->occ, false, extack); + nsim_nexthop_destroy(nexthop_old); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_insert(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop, *nexthop_old; + int err; + + nexthop = nsim_nexthop_create(data, info); + if (!nexthop) + return -ENOMEM; + + nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop_old) + err = nsim_nexthop_add(data, nexthop, info->extack); + else + err = nsim_nexthop_replace(data, nexthop, nexthop_old, + info->extack); + + if (err) + nsim_nexthop_destroy(nexthop); + + return err; +} + +static void nsim_nexthop_remove(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + + nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop) + return; + + rhashtable_remove_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + nsim_nexthop_account(data, nexthop->occ, false, info->extack); + nsim_nexthop_destroy(nexthop); +} + +static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + nexthop_nb); + struct nh_notifier_info *info = ptr; + int err = 0; + + ASSERT_RTNL(); + + switch (event) { + case NEXTHOP_EVENT_REPLACE: + err = nsim_nexthop_insert(data, info); + break; + case NEXTHOP_EVENT_DEL: + nsim_nexthop_remove(data, info); + break; + default: + break; + } + + return notifier_from_errno(err); +} + +static void nsim_nexthop_free(void *ptr, void *arg) +{ + struct nsim_nexthop *nexthop = ptr; + struct nsim_fib_data *data = arg; + struct net *net; + + net = devlink_net(data->devlink); + nexthop_set_hw_flags(net, nexthop->id, false, false); + nsim_nexthop_account(data, nexthop->occ, false, NULL); + nsim_nexthop_destroy(nexthop); +} + static u64 nsim_fib_ipv4_resource_occ_get(void *priv) { struct nsim_fib_data *data = priv; @@ -866,12 +1069,20 @@ static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); } +static u64 nsim_fib_nexthops_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_NEXTHOPS, false); +} + static void nsim_fib_set_max_all(struct nsim_fib_data *data, struct devlink *devlink) { enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, }; int i; @@ -897,20 +1108,32 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, return ERR_PTR(-ENOMEM); data->devlink = devlink; + err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); + if (err) + goto err_data_free; + spin_lock_init(&data->fib_lock); INIT_LIST_HEAD(&data->fib_rt_list); err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); if (err) - goto err_data_free; + goto err_rhashtable_nexthop_destroy; nsim_fib_set_max_all(data, devlink); + data->nexthop_nb.notifier_call = nsim_nexthop_event_nb; + err = register_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb, + extack); + if (err) { + pr_err("Failed to register nexthop notifier\n"); + goto err_rhashtable_fib_destroy; + } + data->fib_nb.notifier_call = nsim_fib_event_nb; err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, nsim_fib_dump_inconsistent, extack); if (err) { pr_err("Failed to register fib notifier\n"); - goto err_rhashtable_destroy; + goto err_nexthop_nb_unregister; } devlink_resource_occ_get_register(devlink, @@ -929,11 +1152,20 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_fib_ipv6_rules_res_occ_get, data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_NEXTHOPS, + nsim_fib_nexthops_res_occ_get, + data); return data; -err_rhashtable_destroy: +err_nexthop_nb_unregister: + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); +err_rhashtable_fib_destroy: rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); +err_rhashtable_nexthop_destroy: + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); err_data_free: kfree(data); return ERR_PTR(err); @@ -941,6 +1173,8 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) { + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_NEXTHOPS); devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV6_FIB_RULES); devlink_resource_occ_get_unregister(devlink, @@ -950,8 +1184,11 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); kfree(data); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 827fc80f50a07..698be048041b9 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -158,6 +158,7 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, }; struct nsim_dev_health { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 876679af6f7cf..183e708dc6cb3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4684,9 +4684,14 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh) static int vxlan_nexthop_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct nexthop *nh = ptr; + struct nh_notifier_info *info = ptr; + struct nexthop *nh; + + if (event != NEXTHOP_EVENT_DEL) + return NOTIFY_DONE; - if (!nh || event != NEXTHOP_EVENT_DEL) + nh = nexthop_find_by_id(info->net, info->id); + if (!nh) return NOTIFY_DONE; vxlan_fdb_nh_flush(nh); @@ -4706,7 +4711,8 @@ static __net_init int vxlan_init_net(struct net *net) for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vn->sock_list[h]); - return register_nexthop_notifier(net, &vn->nexthop_notifier_block); + return register_nexthop_notifier(net, &vn->nexthop_notifier_block, + NULL); } static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 2fd76a9b6dc8b..226930d66b637 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -105,11 +105,49 @@ struct nexthop { }; enum nexthop_event_type { - NEXTHOP_EVENT_DEL + NEXTHOP_EVENT_DEL, + NEXTHOP_EVENT_REPLACE, }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb); +struct nh_notifier_single_info { + struct net_device *dev; + u8 gw_family; + union { + __be32 ipv4; + struct in6_addr ipv6; + }; + u8 is_reject:1, + is_fdb:1, + has_encap:1; +}; + +struct nh_notifier_grp_entry_info { + u8 weight; + u32 id; + struct nh_notifier_single_info nh; +}; + +struct nh_notifier_grp_info { + u16 num_nh; + bool is_fdb; + struct nh_notifier_grp_entry_info nh_entries[]; +}; + +struct nh_notifier_info { + struct net *net; + struct netlink_ext_ack *extack; + u32 id; + bool is_grp; + union { + struct nh_notifier_single_info *nh; + struct nh_notifier_grp_info *nh_grp; + }; +}; + +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d1325ffb00608..2ffbef5da6c1f 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -396,11 +396,13 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ -#define RTNH_F_OFFLOAD 8 /* offloaded route */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ #define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ #define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ -#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD) +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) /* Macros to handle hexthops */ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1f75dc686b6b6..f70b9a0c49579 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1644,6 +1644,8 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); if (nhc->nhc_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; + if (nhc->nhc_flags & RTNH_F_TRAP) + *flags |= RTNH_F_TRAP; if (!skip_oif && nhc->nhc_dev && nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ffc5332f13906..28117c05dc353 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2100,15 +2100,6 @@ static void __fib_info_notify_update(struct net *net, struct fib_table *tb, rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa, KEYLENGTH - fa->fa_slen, tb->tb_id, info, NLM_F_REPLACE); - - /* call_fib_entry_notifiers will be removed when - * in-kernel notifier is implemented and supported - * for nexthop objects - */ - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - n->key, - KEYLENGTH - fa->fa_slen, fa, - NULL); } } } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0dc43ad28eb95..5e1b22d4f939f 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -36,14 +36,145 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_FDB] = { .type = NLA_FLAG }, }; +static bool nexthop_notifiers_is_empty(struct net *net) +{ + return !net->nexthop.notifier_chain.head; +} + +static void +__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, + const struct nexthop *nh) +{ + struct nh_info *nhi = rtnl_dereference(nh->nh_info); + + nh_info->dev = nhi->fib_nhc.nhc_dev; + nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; + if (nh_info->gw_family == AF_INET) + nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; + else if (nh_info->gw_family == AF_INET6) + nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; + + nh_info->is_reject = nhi->reject_nh; + nh_info->is_fdb = nhi->fdb_nh; + nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; +} + +static int nh_notifier_single_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); + if (!info->nh) + return -ENOMEM; + + __nh_notifier_single_info_init(info->nh, nh); + + return 0; +} + +static void nh_notifier_single_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh); +} + +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + u16 num_nh = nhg->num_nh; + int i; + + info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), + GFP_KERNEL); + if (!info->nh_grp) + return -ENOMEM; + + info->nh_grp->num_nh = num_nh; + info->nh_grp->is_fdb = nhg->fdb_nh; + + for (i = 0; i < num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + info->nh_grp->nh_entries[i].id = nhge->nh->id; + info->nh_grp->nh_entries[i].weight = nhge->weight; + __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, + nhge->nh); + } + + return 0; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh_grp); +} + +static int nh_notifier_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->id = nh->id; + info->is_grp = nh->is_group; + + if (info->is_grp) + return nh_notifier_grp_info_init(info, nh); + else + return nh_notifier_single_info_init(info, nh); +} + +static void nh_notifier_info_fini(struct nh_notifier_info *info) +{ + if (info->is_grp) + nh_notifier_grp_info_fini(info); + else + nh_notifier_single_info_fini(info); +} + static int call_nexthop_notifiers(struct net *net, enum nexthop_event_type event_type, - struct nexthop *nh) + struct nexthop *nh, + struct netlink_ext_ack *extack) { + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; int err; + ASSERT_RTNL(); + + if (nexthop_notifiers_is_empty(net)) + return 0; + + err = nh_notifier_info_init(&info, nh); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); + return err; + } + err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, - event_type, nh); + event_type, &info); + nh_notifier_info_fini(&info); + + return notifier_to_errno(err); +} + +static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, + enum nexthop_event_type event_type, + struct nexthop *nh, + struct netlink_ext_ack *extack) +{ + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; + int err; + + err = nh_notifier_info_init(&info, nh); + if (err) + return err; + + err = nb->notifier_call(nb, event_type, &info); + nh_notifier_info_fini(&info); + return notifier_to_errno(err); } @@ -782,9 +913,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; + struct netlink_ext_ack extack; struct nexthop *nh = nhge->nh; struct nh_group *nhg, *newg; - int i, j; + int i, j, err; WARN_ON(!nh); @@ -832,6 +964,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, list_del(&nhge->nh_list); nexthop_put(nhge->nh); + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); + if (err) + pr_err("%s\n", extack._msg); + if (nlinfo) nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); } @@ -907,7 +1043,7 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh, static void remove_nexthop(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { - call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); /* remove from the tree */ rb_erase(&nh->rb_node, &net->nexthop.rb_root); @@ -940,13 +1076,17 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, struct netlink_ext_ack *extack) { struct nh_group *oldg, *newg; - int i; + int i, err; if (!new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + oldg = rtnl_dereference(old->nh_grp); newg = rtnl_dereference(new->nh_grp); @@ -985,31 +1125,54 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, struct nexthop *new, struct netlink_ext_ack *extack) { + u8 old_protocol, old_nh_flags; struct nh_info *oldi, *newi; + struct nh_grp_entry *nhge; + int err; if (new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + + /* Hardware flags were set on 'old' as 'new' is not in the red-black + * tree. Therefore, inherit the flags from 'old' to 'new'. + */ + new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); + oldi = rtnl_dereference(old->nh_info); newi = rtnl_dereference(new->nh_info); newi->nh_parent = old; oldi->nh_parent = new; + old_protocol = old->protocol; + old_nh_flags = old->nh_flags; + old->protocol = new->protocol; old->nh_flags = new->nh_flags; rcu_assign_pointer(old->nh_info, newi); rcu_assign_pointer(new->nh_info, oldi); + /* Send a replace notification for all the groups using the nexthop. */ + list_for_each_entry(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, + extack); + if (err) + goto err_notify; + } + /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially * update IPv4 indication in all the groups using the nexthop. */ if (oldi->family == AF_INET && newi->family == AF_INET6) { - struct nh_grp_entry *nhge; - list_for_each_entry(nhge, &old->grp_list, nh_list) { struct nexthop *nhp = nhge->nh_parent; struct nh_group *nhg; @@ -1020,6 +1183,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, } return 0; + +err_notify: + rcu_assign_pointer(new->nh_info, newi); + rcu_assign_pointer(old->nh_info, oldi); + old->nh_flags = old_nh_flags; + old->protocol = old_protocol; + oldi->nh_parent = old; + newi->nh_parent = new; + list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack); + } + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); + return err; } static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, @@ -1168,7 +1346,11 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, rb_link_node_rcu(&new_nh->rb_node, parent, pp); rb_insert_color(&new_nh->rb_node, root); - rc = 0; + + rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); + if (rc) + rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); + out: if (!rc) { nh_base_seq_inc(net); @@ -1957,10 +2139,40 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +static int nexthops_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct rb_root *root = &net->nexthop.rb_root; + struct rb_node *node; + int err = 0; + + for (node = rb_first(root); node; node = rb_next(node)) { + struct nexthop *nh; + + nh = rb_entry(node, struct nexthop, rb_node); + err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, + extack); + if (err) + break; + } + + return err; +} + +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return blocking_notifier_chain_register(&net->nexthop.notifier_chain, - nb); + int err; + + rtnl_lock(); + err = nexthops_dump(net, nb, extack); + if (err) + goto unlock; + err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, + nb); +unlock: + rtnl_unlock(); + return err; } EXPORT_SYMBOL(register_nexthop_notifier); @@ -1971,6 +2183,27 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) } EXPORT_SYMBOL(unregister_nexthop_notifier); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) +{ + struct nexthop *nexthop; + + rcu_read_lock(); + + nexthop = nexthop_find_by_id(net, id); + if (!nexthop) + goto out; + + nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); + if (offload) + nexthop->nh_flags |= RTNH_F_OFFLOAD; + if (trap) + nexthop->nh_flags |= RTNH_F_TRAP; + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL(nexthop_set_hw_flags); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7e0ce7af82346..f91a689edafd2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6039,11 +6039,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, struct sk_buff *skb; int err = -ENOBUFS; - /* call_fib6_entry_notifiers will be removed when in-kernel notifier - * is implemented and supported for nexthop objects - */ - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL); - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (!skb) goto errout; diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 0000000000000..be0c1b5ee6b8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS