From 1c9cac65cecd4f98d8713ec0c737c38a26e5b5c2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:23 +0200 Subject: [PATCH 01/18] nexthop: Add nexthop notification data structures Add data structures that will be used for nexthop replace and delete notifications in the previously introduced nexthop notification chain. New data structures are added instead of passing the existing nexthop code structures directly for several reasons. First, the existing structures encode a lot of bookkeeping information which is irrelevant for listeners of the notification chain. Second, the existing structures can be changed without worrying about introducing regressions in listeners since they are not accessed directly by them. Third, listeners of the notification chain do not need to each parse the relatively complex nexthop code structures. They are passing the required information in a simplified way. Note that a single 'has_encap' bit is added instead of the actual encapsulation information since current listeners do not support such nexthops. Changes since RFC: * s/is_encap/has_encap/ Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 2fd76a9b6dc8b..4a17b040b5020 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -108,6 +108,41 @@ enum nexthop_event_type { NEXTHOP_EVENT_DEL }; +struct nh_notifier_single_info { + struct net_device *dev; + u8 gw_family; + union { + __be32 ipv4; + struct in6_addr ipv6; + }; + u8 is_reject:1, + is_fdb:1, + has_encap:1; +}; + +struct nh_notifier_grp_entry_info { + u8 weight; + u32 id; + struct nh_notifier_single_info nh; +}; + +struct nh_notifier_grp_info { + u16 num_nh; + bool is_fdb; + struct nh_notifier_grp_entry_info nh_entries[]; +}; + +struct nh_notifier_info { + struct net *net; + struct netlink_ext_ack *extack; + u32 id; + bool is_grp; + union { + struct nh_notifier_single_info *nh; + struct nh_notifier_grp_info *nh_grp; + }; +}; + int register_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); From 3578d53dcef152a460a2d560c95dcc4399ff04cd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:24 +0200 Subject: [PATCH 02/18] nexthop: Pass extack to nexthop notifier The next patch will add extack to the notification info. This allows listeners to veto notifications and communicate the reason to user space. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0dc43ad28eb95..f677cb12fd565 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -38,7 +38,8 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { static int call_nexthop_notifiers(struct net *net, enum nexthop_event_type event_type, - struct nexthop *nh) + struct nexthop *nh, + struct netlink_ext_ack *extack) { int err; @@ -907,7 +908,7 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh, static void remove_nexthop(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { - call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); /* remove from the tree */ rb_erase(&nh->rb_node, &net->nexthop.rb_root); From 5ca474f23454d4231e2e879d4d0daf3f85de582c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:25 +0200 Subject: [PATCH 03/18] nexthop: Prepare new notification info Prepare the new notification information so that it could be passed to listeners in the new patch. Changes since RFC: * Add a blank line in __nh_notifier_single_info_init() Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 109 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f677cb12fd565..85a595883222c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -36,15 +36,124 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_FDB] = { .type = NLA_FLAG }, }; +static bool nexthop_notifiers_is_empty(struct net *net) +{ + return !net->nexthop.notifier_chain.head; +} + +static void +__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, + const struct nexthop *nh) +{ + struct nh_info *nhi = rtnl_dereference(nh->nh_info); + + nh_info->dev = nhi->fib_nhc.nhc_dev; + nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; + if (nh_info->gw_family == AF_INET) + nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; + else if (nh_info->gw_family == AF_INET6) + nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; + + nh_info->is_reject = nhi->reject_nh; + nh_info->is_fdb = nhi->fdb_nh; + nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; +} + +static int nh_notifier_single_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); + if (!info->nh) + return -ENOMEM; + + __nh_notifier_single_info_init(info->nh, nh); + + return 0; +} + +static void nh_notifier_single_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh); +} + +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + u16 num_nh = nhg->num_nh; + int i; + + info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), + GFP_KERNEL); + if (!info->nh_grp) + return -ENOMEM; + + info->nh_grp->num_nh = num_nh; + info->nh_grp->is_fdb = nhg->fdb_nh; + + for (i = 0; i < num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + info->nh_grp->nh_entries[i].id = nhge->nh->id; + info->nh_grp->nh_entries[i].weight = nhge->weight; + __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, + nhge->nh); + } + + return 0; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh_grp); +} + +static int nh_notifier_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->id = nh->id; + info->is_grp = nh->is_group; + + if (info->is_grp) + return nh_notifier_grp_info_init(info, nh); + else + return nh_notifier_single_info_init(info, nh); +} + +static void nh_notifier_info_fini(struct nh_notifier_info *info) +{ + if (info->is_grp) + nh_notifier_grp_info_fini(info); + else + nh_notifier_single_info_fini(info); +} + static int call_nexthop_notifiers(struct net *net, enum nexthop_event_type event_type, struct nexthop *nh, struct netlink_ext_ack *extack) { + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; int err; + ASSERT_RTNL(); + + if (nexthop_notifiers_is_empty(net)) + return 0; + + err = nh_notifier_info_init(&info, nh); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); + return err; + } + err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, event_type, nh); + nh_notifier_info_fini(&info); + return notifier_to_errno(err); } From 1ec69d187cb8f564af570df7edf2c500dcb13702 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:26 +0200 Subject: [PATCH 04/18] nexthop: vxlan: Convert to new notification info Convert the sole listener of the nexthop notification chain (the VXLAN driver) to the new notification info. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/vxlan.c | 9 +++++++-- net/ipv4/nexthop.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 876679af6f7cf..b9db20b4ebfd7 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4684,9 +4684,14 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh) static int vxlan_nexthop_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct nexthop *nh = ptr; + struct nh_notifier_info *info = ptr; + struct nexthop *nh; + + if (event != NEXTHOP_EVENT_DEL) + return NOTIFY_DONE; - if (!nh || event != NEXTHOP_EVENT_DEL) + nh = nexthop_find_by_id(info->net, info->id); + if (!nh) return NOTIFY_DONE; vxlan_fdb_nh_flush(nh); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 85a595883222c..1d66f2439063b 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -151,7 +151,7 @@ static int call_nexthop_notifiers(struct net *net, } err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, - event_type, nh); + event_type, &info); nh_notifier_info_fini(&info); return notifier_to_errno(err); From 968a83f8cf6fd5a107289c57ee3197a52c72f02c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:27 +0200 Subject: [PATCH 05/18] rtnetlink: Add RTNH_F_TRAP flag The flag indicates to user space that the nexthop is not programmed to forward packets in hardware, but rather to trap them to the CPU. This is needed, for example, when the MAC of the nexthop neighbour is not resolved and packets should reach the CPU to trigger neighbour resolution. The flag will be used in subsequent patches by netdevsim to test nexthop objects programming to device drivers and in the future by mlxsw as well. Changes since RFC: * Reword commit message Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/uapi/linux/rtnetlink.h | 6 ++++-- net/ipv4/fib_semantics.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d1325ffb00608..2ffbef5da6c1f 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -396,11 +396,13 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ -#define RTNH_F_OFFLOAD 8 /* offloaded route */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ #define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ #define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ -#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD) +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) /* Macros to handle hexthops */ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1f75dc686b6b6..f70b9a0c49579 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1644,6 +1644,8 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); if (nhc->nhc_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; + if (nhc->nhc_flags & RTNH_F_TRAP) + *flags |= RTNH_F_TRAP; if (!skip_oif && nhc->nhc_dev && nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) From e95f2592f633a334b175003f13f42d3c217dc657 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:28 +0200 Subject: [PATCH 06/18] nexthop: Allow setting "offload" and "trap" indications on nexthops Add a function that can be called by device drivers to set "offload" or "trap" indication on nexthops following nexthop notifications. Changes since RFC: * s/nexthop_hw_flags_set/nexthop_set_hw_flags/ Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 1 + net/ipv4/nexthop.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 4a17b040b5020..aa7ac12c35e24 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -145,6 +145,7 @@ struct nh_notifier_info { int register_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1d66f2439063b..d1a1600aee18d 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2081,6 +2081,27 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) } EXPORT_SYMBOL(unregister_nexthop_notifier); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) +{ + struct nexthop *nexthop; + + rcu_read_lock(); + + nexthop = nexthop_find_by_id(net, id); + if (!nexthop) + goto out; + + nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); + if (offload) + nexthop->nh_flags |= RTNH_F_OFFLOAD; + if (trap) + nexthop->nh_flags |= RTNH_F_TRAP; + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL(nexthop_set_hw_flags); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); From 732d167bf5f53a8c1e8c53cf7dbffe2a13f63752 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:29 +0200 Subject: [PATCH 07/18] nexthop: Emit a notification when a nexthop is added Emit a notification in the nexthop notification chain when a new nexthop is added (not replaced). The nexthop can either be a new group or a single nexthop. The notification is sent after the nexthop is inserted into the red-black tree, as listeners might need to callback into the nexthop code with the nexthop ID in order to mark the nexthop as offloaded. A 'REPLACE' notification is emitted instead of 'ADD' as the distinction between the two is not important for in-kernel listeners. In case the listener is not familiar with the encoded nexthop ID, it can simply treat it as a new one. This is also consistent with the route offload API. Changes since RFC: * Reword commit message Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 3 ++- net/ipv4/nexthop.c | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index aa7ac12c35e24..9c85199b826ef 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -105,7 +105,8 @@ struct nexthop { }; enum nexthop_event_type { - NEXTHOP_EVENT_DEL + NEXTHOP_EVENT_DEL, + NEXTHOP_EVENT_REPLACE, }; struct nh_notifier_single_info { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d1a1600aee18d..4e9d0395f959b 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1278,7 +1278,11 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, rb_link_node_rcu(&new_nh->rb_node, parent, pp); rb_insert_color(&new_nh->rb_node, root); - rc = 0; + + rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); + if (rc) + rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); + out: if (!rc) { nh_base_seq_inc(net); From d144cc5f4f4eaf0cd2e7a1a6369054da583f6c93 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:30 +0200 Subject: [PATCH 08/18] nexthop: Emit a notification when a nexthop group is replaced Emit a notification in the nexthop notification chain when an existing nexthop group is replaced. The notification is emitted after all the validation checks were performed, but before the new configuration (i.e., 'struct nh_grp') is pointed at by the old shell (i.e., 'struct nexthop'). This prevents the need to perform rollback in case the notification is vetoed. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 4e9d0395f959b..3f43693498ee8 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1050,13 +1050,17 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, struct netlink_ext_ack *extack) { struct nh_group *oldg, *newg; - int i; + int i, err; if (!new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + oldg = rtnl_dereference(old->nh_grp); newg = rtnl_dereference(new->nh_grp); From 8c09c9f9d846cdd8a92604c591132985b04fd1d6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:31 +0200 Subject: [PATCH 09/18] nexthop: Emit a notification when a single nexthop is replaced The notification is emitted after all the validation checks were performed, but before the new configuration (i.e., 'struct nh_info') is pointed at by the old shell (i.e., 'struct nexthop'). This prevents the need to perform rollback in case the notification is vetoed. The next patch will also emit a replace notification for all the nexthop groups in which the nexthop is used. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 3f43693498ee8..11bfb1eb7f84a 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1100,12 +1100,22 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, struct netlink_ext_ack *extack) { struct nh_info *oldi, *newi; + int err; if (new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + + /* Hardware flags were set on 'old' as 'new' is not in the red-black + * tree. Therefore, inherit the flags from 'old' to 'new'. + */ + new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); + oldi = rtnl_dereference(old->nh_info); newi = rtnl_dereference(new->nh_info); From f17bc33d7412bcca58825273d9f4abf84a87c4cb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:32 +0200 Subject: [PATCH 10/18] nexthop: Emit a notification when a nexthop group is modified When a single nexthop is replaced, the configuration of all the groups using the nexthop is effectively modified. In this case, emit a notification in the nexthop notification chain for each modified group so that listeners would not need to keep track of which nexthops are member in which groups. The notification can only be emitted after the new configuration (i.e., 'struct nh_info') is pointed at by the old shell (i.e., 'struct nexthop'). Before that the configuration of the nexthop groups is still the same as before the replacement. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 11bfb1eb7f84a..0e8e753956f1e 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1099,7 +1099,9 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, struct nexthop *new, struct netlink_ext_ack *extack) { + u8 old_protocol, old_nh_flags; struct nh_info *oldi, *newi; + struct nh_grp_entry *nhge; int err; if (new->is_group) { @@ -1122,18 +1124,29 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, newi->nh_parent = old; oldi->nh_parent = new; + old_protocol = old->protocol; + old_nh_flags = old->nh_flags; + old->protocol = new->protocol; old->nh_flags = new->nh_flags; rcu_assign_pointer(old->nh_info, newi); rcu_assign_pointer(new->nh_info, oldi); + /* Send a replace notification for all the groups using the nexthop. */ + list_for_each_entry(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, + extack); + if (err) + goto err_notify; + } + /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially * update IPv4 indication in all the groups using the nexthop. */ if (oldi->family == AF_INET && newi->family == AF_INET6) { - struct nh_grp_entry *nhge; - list_for_each_entry(nhge, &old->grp_list, nh_list) { struct nexthop *nhp = nhge->nh_parent; struct nh_group *nhg; @@ -1144,6 +1157,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, } return 0; + +err_notify: + rcu_assign_pointer(new->nh_info, newi); + rcu_assign_pointer(old->nh_info, oldi); + old->nh_flags = old_nh_flags; + old->protocol = old_protocol; + oldi->nh_parent = old; + newi->nh_parent = new; + list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack); + } + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); + return err; } static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, From 833a1065eeb14437a9a0dfa9dad06ea09894e0b5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:33 +0200 Subject: [PATCH 11/18] nexthop: Emit a notification when a nexthop group is reduced When a single nexthop is deleted, the configuration of all the groups using the nexthop is effectively modified. In this case, emit a notification in the nexthop notification chain for each modified group so that listeners would not need to keep track of which nexthops are member in which groups. In the rare cases where the notification fails, emit an error to the kernel log. This is done by allocating extack on the stack and printing the error logged by the listener that rejected the notification. Changes since RFC: * Allocate extack on the stack Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0e8e753956f1e..d5a2c28bea490 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -892,9 +892,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; + struct netlink_ext_ack extack; struct nexthop *nh = nhge->nh; struct nh_group *nhg, *newg; - int i, j; + int i, j, err; WARN_ON(!nh); @@ -942,6 +943,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, list_del(&nhge->nh_list); nexthop_put(nhge->nh); + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); + if (err) + pr_err("%s\n", extack._msg); + if (nlinfo) nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); } From ce7e9c8a080b6daca646a7740d916ed547503a12 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:34 +0200 Subject: [PATCH 12/18] nexthop: Pass extack to register_nexthop_notifier() This will be used by the next patch which extends the function to replay all the existing nexthops to the notifier block being registered. Device drivers will be able to pass extack to the function since it is passed to them upon reload from devlink. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/vxlan.c | 3 ++- include/net/nexthop.h | 3 ++- net/ipv4/nexthop.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b9db20b4ebfd7..183e708dc6cb3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4711,7 +4711,8 @@ static __net_init int vxlan_init_net(struct net *net) for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vn->sock_list[h]); - return register_nexthop_notifier(net, &vn->nexthop_notifier_block); + return register_nexthop_notifier(net, &vn->nexthop_notifier_block, + NULL); } static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 9c85199b826ef..226930d66b637 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -144,7 +144,8 @@ struct nh_notifier_info { }; }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb); +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d5a2c28bea490..9a235d5c56701 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2118,7 +2118,8 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return blocking_notifier_chain_register(&net->nexthop.notifier_chain, nb); From 975ff7f3324af33e218e3d0abf5797c2b67f97d7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:35 +0200 Subject: [PATCH 13/18] nexthop: Replay nexthops when registering a notifier When registering a new notifier to the nexthop notification chain, replay all the existing nexthops to the new notifier so that it will have a complete picture of the available nexthops. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 9a235d5c56701..5e1b22d4f939f 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -157,6 +157,27 @@ static int call_nexthop_notifiers(struct net *net, return notifier_to_errno(err); } +static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, + enum nexthop_event_type event_type, + struct nexthop *nh, + struct netlink_ext_ack *extack) +{ + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; + int err; + + err = nh_notifier_info_init(&info, nh); + if (err) + return err; + + err = nb->notifier_call(nb, event_type, &info); + nh_notifier_info_fini(&info); + + return notifier_to_errno(err); +} + static unsigned int nh_dev_hashfn(unsigned int val) { unsigned int mask = NH_DEV_HASHSIZE - 1; @@ -2118,11 +2139,40 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; +static int nexthops_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct rb_root *root = &net->nexthop.rb_root; + struct rb_node *node; + int err = 0; + + for (node = rb_first(root); node; node = rb_next(node)) { + struct nexthop *nh; + + nh = rb_entry(node, struct nexthop, rb_node); + err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, + extack); + if (err) + break; + } + + return err; +} + int register_nexthop_notifier(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { - return blocking_notifier_chain_register(&net->nexthop.notifier_chain, - nb); + int err; + + rtnl_lock(); + err = nexthops_dump(net, nb, extack); + if (err) + goto unlock; + err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, + nb); +unlock: + rtnl_unlock(); + return err; } EXPORT_SYMBOL(register_nexthop_notifier); From bbea126c2badf677925e46083d708ca292c530f6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:36 +0200 Subject: [PATCH 14/18] nexthop: Remove in-kernel route notifications when nexthop changes Remove in-kernel route notifications when the configuration of their nexthop changes. These notifications are unnecessary because the route still uses the same nexthop ID. A separate notification for the nexthop change itself is now sent in the nexthop notification chain. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/fib_trie.c | 9 --------- net/ipv6/route.c | 5 ----- 2 files changed, 14 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ffc5332f13906..28117c05dc353 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2100,15 +2100,6 @@ static void __fib_info_notify_update(struct net *net, struct fib_table *tb, rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa, KEYLENGTH - fa->fa_slen, tb->tb_id, info, NLM_F_REPLACE); - - /* call_fib_entry_notifiers will be removed when - * in-kernel notifier is implemented and supported - * for nexthop objects - */ - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - n->key, - KEYLENGTH - fa->fa_slen, fa, - NULL); } } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7e0ce7af82346..f91a689edafd2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6039,11 +6039,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, struct sk_buff *skb; int err = -ENOBUFS; - /* call_fib6_entry_notifiers will be removed when in-kernel notifier - * is implemented and supported for nexthop objects - */ - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL); - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (!skb) goto errout; From 35266255d61c777abd69c2faaa44c92b5a0483c2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:37 +0200 Subject: [PATCH 15/18] netdevsim: Add devlink resource for nexthops The Spectrum ASIC has a dedicated table where nexthops (i.e., adjacency entries) are populated. The size of this table can be controlled via devlink-resource. Add such a resource to netdevsim so that its occupancy will reflect the number of nexthop objects currently programmed to the device. By limiting the size of the resource, error paths could be exercised and tested. Example output: # devlink resource show netdevsim/netdevsim10 netdevsim/netdevsim10: name IPv4 size unlimited unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none resources: name fib size unlimited occ 4 unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none name fib-rules size unlimited occ 3 unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none name IPv6 size unlimited unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none resources: name fib size unlimited occ 1 unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none name fib-rules size unlimited occ 2 unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none name nexthops size unlimited occ 0 unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables none Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../networking/devlink/netdevsim.rst | 3 ++- drivers/net/netdevsim/dev.c | 6 +++++ drivers/net/netdevsim/fib.c | 23 ++++++++++++++++++- drivers/net/netdevsim/netdevsim.h | 1 + 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 2a266b7e7b38c..02c2d20dc6732 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -46,7 +46,7 @@ Resources ========= The ``netdevsim`` driver exposes resources to control the number of FIB -entries and FIB rule entries that the driver will allow. +entries, FIB rule entries and nexthops that the driver will allow. .. code:: shell @@ -54,6 +54,7 @@ entries and FIB rule entries that the driver will allow. $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16 $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64 $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16 + $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16 $ devlink dev reload netdevsim/netdevsim0 Driver-specific Traps diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d070614176755..49cc1fea9e02b 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -324,6 +324,12 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } + /* Resources for nexthops */ + err = devlink_resource_register(devlink, "nexthops", (u64)-1, + NSIM_RESOURCE_NEXTHOPS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); + out: return err; } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index deea17a0e79c9..3ec0f8896efe3 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -42,6 +42,7 @@ struct nsim_fib_data { struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; + struct nsim_fib_entry nexthops; struct rhashtable fib_rt_ht; struct list_head fib_rt_list; spinlock_t fib_lock; /* Protects hashtable, list and accounting */ @@ -104,6 +105,9 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: return 0; } @@ -129,6 +133,9 @@ static void nsim_fib_set_max(struct nsim_fib_data *fib_data, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: WARN_ON(1); return; @@ -866,12 +873,20 @@ static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); } +static u64 nsim_fib_nexthops_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_NEXTHOPS, false); +} + static void nsim_fib_set_max_all(struct nsim_fib_data *data, struct devlink *devlink) { enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, }; int i; @@ -929,6 +944,10 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_fib_ipv6_rules_res_occ_get, data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_NEXTHOPS, + nsim_fib_nexthops_res_occ_get, + data); return data; err_rhashtable_destroy: @@ -941,6 +960,8 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) { + devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_NEXTHOPS); devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV6_FIB_RULES); devlink_resource_occ_get_unregister(devlink, diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 827fc80f50a07..698be048041b9 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -158,6 +158,7 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, }; struct nsim_dev_health { From 8fa84742d66625281a14b5db4c1f64f06c5b106a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:38 +0200 Subject: [PATCH 16/18] netdevsim: Add dummy implementation for nexthop offload Implement dummy nexthop "offload" in the driver by storing currently "programmed" nexthops in a hash table. Each nexthop in the hash table is marked with "trap" indication and increments the nexthops resource occupancy. This will later allow us to test the nexthop offload API on top of netdevsim. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/fib.c | 232 +++++++++++++++++++++++++++++++++++- 1 file changed, 229 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 3ec0f8896efe3..9bdd9b9693e17 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "netdevsim.h" @@ -46,6 +47,8 @@ struct nsim_fib_data { struct rhashtable fib_rt_ht; struct list_head fib_rt_list; spinlock_t fib_lock; /* Protects hashtable, list and accounting */ + struct notifier_block nexthop_nb; + struct rhashtable nexthop_ht; struct devlink *devlink; }; @@ -87,6 +90,19 @@ static const struct rhashtable_params nsim_fib_rt_ht_params = { .automatic_shrinking = true, }; +struct nsim_nexthop { + struct rhash_head ht_node; + u64 occ; + u32 id; +}; + +static const struct rhashtable_params nsim_nexthop_ht_params = { + .key_offset = offsetof(struct nsim_nexthop, id), + .head_offset = offsetof(struct nsim_nexthop, ht_node), + .key_len = sizeof(u32), + .automatic_shrinking = true, +}; + u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max) { @@ -845,6 +861,196 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) data->ipv6.rules.num = 0ULL; } +static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + u64 occ = 0; + int i; + + nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); + if (!nexthop) + return NULL; + + nexthop->id = info->id; + + /* Determine the number of nexthop entries the new nexthop will + * occupy. + */ + + if (!info->is_grp) { + occ = 1; + goto out; + } + + for (i = 0; i < info->nh_grp->num_nh; i++) + occ += info->nh_grp->nh_entries[i].weight; + +out: + nexthop->occ = occ; + return nexthop; +} + +static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop) +{ + kfree(nexthop); +} + +static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ, + bool add, struct netlink_ext_ack *extack) +{ + int err = 0; + + if (add) { + if (data->nexthops.num + occ <= data->nexthops.max) { + data->nexthops.num += occ; + } else { + err = -ENOSPC; + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); + } + } else { + if (WARN_ON(occ > data->nexthops.num)) + return -EINVAL; + data->nexthops.num -= occ; + } + + return err; +} + +static int nsim_nexthop_add(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_insert_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to insert nexthop"); + goto err_nexthop_dismiss; + } + + nexthop_set_hw_flags(net, nexthop->id, false, true); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_replace(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct nsim_nexthop *nexthop_old, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_replace_fast(&data->nexthop_ht, + &nexthop_old->ht_node, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace nexthop"); + goto err_nexthop_dismiss; + } + + nexthop_set_hw_flags(net, nexthop->id, false, true); + nsim_nexthop_account(data, nexthop_old->occ, false, extack); + nsim_nexthop_destroy(nexthop_old); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_insert(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop, *nexthop_old; + int err; + + nexthop = nsim_nexthop_create(data, info); + if (!nexthop) + return -ENOMEM; + + nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop_old) + err = nsim_nexthop_add(data, nexthop, info->extack); + else + err = nsim_nexthop_replace(data, nexthop, nexthop_old, + info->extack); + + if (err) + nsim_nexthop_destroy(nexthop); + + return err; +} + +static void nsim_nexthop_remove(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + + nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop) + return; + + rhashtable_remove_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + nsim_nexthop_account(data, nexthop->occ, false, info->extack); + nsim_nexthop_destroy(nexthop); +} + +static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + nexthop_nb); + struct nh_notifier_info *info = ptr; + int err = 0; + + ASSERT_RTNL(); + + switch (event) { + case NEXTHOP_EVENT_REPLACE: + err = nsim_nexthop_insert(data, info); + break; + case NEXTHOP_EVENT_DEL: + nsim_nexthop_remove(data, info); + break; + default: + break; + } + + return notifier_from_errno(err); +} + +static void nsim_nexthop_free(void *ptr, void *arg) +{ + struct nsim_nexthop *nexthop = ptr; + struct nsim_fib_data *data = arg; + struct net *net; + + net = devlink_net(data->devlink); + nexthop_set_hw_flags(net, nexthop->id, false, false); + nsim_nexthop_account(data, nexthop->occ, false, NULL); + nsim_nexthop_destroy(nexthop); +} + static u64 nsim_fib_ipv4_resource_occ_get(void *priv) { struct nsim_fib_data *data = priv; @@ -912,20 +1118,32 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, return ERR_PTR(-ENOMEM); data->devlink = devlink; + err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); + if (err) + goto err_data_free; + spin_lock_init(&data->fib_lock); INIT_LIST_HEAD(&data->fib_rt_list); err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); if (err) - goto err_data_free; + goto err_rhashtable_nexthop_destroy; nsim_fib_set_max_all(data, devlink); + data->nexthop_nb.notifier_call = nsim_nexthop_event_nb; + err = register_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb, + extack); + if (err) { + pr_err("Failed to register nexthop notifier\n"); + goto err_rhashtable_fib_destroy; + } + data->fib_nb.notifier_call = nsim_fib_event_nb; err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, nsim_fib_dump_inconsistent, extack); if (err) { pr_err("Failed to register fib notifier\n"); - goto err_rhashtable_destroy; + goto err_nexthop_nb_unregister; } devlink_resource_occ_get_register(devlink, @@ -950,9 +1168,14 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, data); return data; -err_rhashtable_destroy: +err_nexthop_nb_unregister: + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); +err_rhashtable_fib_destroy: rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); +err_rhashtable_nexthop_destroy: + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); err_data_free: kfree(data); return ERR_PTR(err); @@ -971,8 +1194,11 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); kfree(data); } From 66e58bf0707b126e2da90eca8cf00845757e9fbe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:39 +0200 Subject: [PATCH 17/18] netdevsim: Allow programming routes with nexthop objects Previous patches added the ability to program nexthop objects. Therefore, no longer forbid the programming of routes that point to such objects. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/fib.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 9bdd9b9693e17..45d8a7790bd5b 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -412,11 +412,6 @@ static int nsim_fib4_event(struct nsim_fib_data *data, fen_info = container_of(info, struct fib_entry_notifier_info, info); - if (fen_info->fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return 0; - } - switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib4_rt_insert(data, fen_info); @@ -727,11 +722,6 @@ static int nsim_fib6_event(struct nsim_fib_data *data, fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - if (fen6_info->rt->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); - return 0; - } - if (fen6_info->rt->fib6_src.plen) { NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported"); return 0; From 21584e6a92bd2a85411793c0da3d48ab327e9b72 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Nov 2020 15:30:40 +0200 Subject: [PATCH 18/18] selftests: netdevsim: Add test for nexthop offload API Test various aspects of the nexthop offload API on top of the netdevsim implementation. Both good and bad flows are tested. Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../drivers/net/netdevsim/nexthop.sh | 436 ++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/nexthop.sh diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 0000000000000..be0c1b5ee6b8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS