Skip to content

Commit

Permalink
IPv6 host and subnet anycast route in VRF to be configured with nhop (#…
Browse files Browse the repository at this point in the history
…108)

interface as actual netdev instead of VRF L3MDEV

When we configure IPv6 route with plen <127(127 is used for inter router
links), kernel auto-generates subnet-router anycast address. This is as
per RFC 4291(section Required Anycast address) and RFC 6164. In kernel
version 4.9.x these routes are autogenerated with nhop interface as VRF
L3MDEV instead of the interface on which these address are configured.
Also host address(/128) were also pointing to same L3MDEV.

This was creating issue esp during warm-reboot since FRR installs these
routes via fpm to APP_DB ROUTE_TABLE pointing to VRF name. Warm boot
will be blocked since routerorch will keep waiting for these routes to
be uninstalled.
Also these routes will not be installed to hardware since nexthop ifname
is L3MDEV.

Backporting patch from commit:
torvalds/linux@4832c30#diff-f856f42c9bd7ceebf824b5ccadc83280
  • Loading branch information
preetham-singh authored and lguohan committed Oct 10, 2019
1 parent 6d55e9a commit 5786674
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 0 deletions.
190 changes: 190 additions & 0 deletions patch/0000-net-ipv6-ll-anycast-mcast-routes-on-dev.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
From 90df429b3fdfac074351327ca8681f940480229a Mon Sep 17 00:00:00 2001
From: Preetham Singh <preetham.singh@broadcom.com>
Date: Sun, 8 Sep 2019 09:47:17 +0000
Subject: [PATCH] From 0b4074c953a707e8b3103fefeeaa5b439f869b3b Mon Sep 17
00:00:00 2001 Subject: [PATCH]net: IPv6 anycast, mcast routes with nhop
interface

All auto generated IPv6 LL, anycast and multicast routes are installed
with nhop as l3mdev instead of interface.

Signed-off-by: Preetham Singh <preetham.singh@broadcom.com>
---
net/ipv6/addrconf.c | 41 -----------------------------------------
net/ipv6/icmp.c | 15 +++++++++++++--
net/ipv6/route.c | 45 +++++++++++++++++++++++++++++++++++----------
3 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4a5d3bbf..a8bd7807 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2969,9 +2969,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
static void init_loopback(struct net_device *dev)
{
struct inet6_dev *idev;
- struct net_device *sp_dev;
- struct inet6_ifaddr *sp_ifa;
- struct rt6_info *sp_rt;

/* ::1 */

@@ -2985,44 +2982,6 @@ static void init_loopback(struct net_device *dev)

add_addr(idev, &in6addr_loopback, 128, IFA_HOST);

- /* Add routes to other interface's IPv6 addresses */
- for_each_netdev(dev_net(dev), sp_dev) {
- if (!strcmp(sp_dev->name, dev->name))
- continue;
-
- idev = __in6_dev_get(sp_dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
-
- if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
- continue;
-
- if (sp_ifa->rt) {
- /* This dst has been added to garbage list when
- * lo device down, release this obsolete dst and
- * reallocate a new router for ifa.
- */
- if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
- ip6_rt_put(sp_ifa->rt);
- sp_ifa->rt = NULL;
- } else {
- continue;
- }
- }
-
- sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
-
- /* Failure cases are ignored */
- if (!IS_ERR(sp_rt)) {
- sp_ifa->rt = sp_rt;
- ip6_ins_rt(sp_rt);
- }
- }
- read_unlock_bh(&idev->lock);
- }
}

void addrconf_add_linklocal(struct inet6_dev *idev,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2772004b..f18128fb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -445,9 +445,20 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
* Source addr check
*/

- if (__ipv6_addr_needs_scope_id(addr_type))
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
iif = skb->dev->ifindex;
- else {
+
+ /* for local packets, get the real device index */
+ if (iif == LOOPBACK_IFINDEX) {
+ dst = skb_dst(skb);
+ if (dst) {
+ struct rt6_info *rt;
+
+ rt = container_of(dst, struct rt6_info, dst);
+ iif = rt->rt6i_idev->dev->ifindex;
+ }
+ }
+ } else {
dst = skb_dst(skb);
iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cbc066d2..7dcd34f5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -940,10 +940,35 @@ int ip6_ins_rt(struct rt6_info *rt)
return __ip6_ins_rt(rt, &info, &mxc);
}

+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+{
+ struct net_device *dev = rt->dst.dev;
+
+ if (rt->rt6i_flags & RTF_LOCAL) {
+ /* for copies of local routes, dst->dev needs to be the
+ * device if it is a master device, the master device if
+ * device is enslaved, and the loopback as the default
+ */
+ if (netif_is_l3_slave(dev) &&
+ !rt6_need_strict(&rt->rt6i_dst.addr))
+ dev = l3mdev_master_dev_rcu(dev);
+ else if (!netif_is_l3_master(dev))
+ dev = dev_net(dev)->loopback_dev;
+ /* last case is netif_is_l3_master(dev) is true in which
+ * case we want dev returned to be dev
+ */
+ }
+
+ return dev;
+}
+
+
static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct net_device *dev;
struct rt6_info *rt;

/*
@@ -953,7 +978,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = (struct rt6_info *)ort->dst.from;

- rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
+ rcu_read_lock();
+ dev = ip6_rt_get_dev_rcu(ort);
+ rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+ rcu_read_unlock();

if (!rt)
return NULL;
@@ -983,9 +1011,12 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
{
struct rt6_info *pcpu_rt;
+ struct net_device *dev;

- pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
- rt->dst.dev, rt->dst.flags);
+ rcu_read_lock();
+ dev = ip6_rt_get_dev_rcu(rt);
+ pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+ rcu_read_unlock();

if (!pcpu_rt)
return NULL;
@@ -2623,15 +2654,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct net_device *dev = net->loopback_dev;
+ struct net_device *dev = idev->dev;
struct rt6_info *rt;

- /* use L3 Master device as loopback for host routes if device
- * is enslaved and address is not link local or multicast
- */
- if (!rt6_need_strict(addr))
- dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
-
rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
--
2.18.0

1 change: 1 addition & 0 deletions patch/series
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ linux-4.16-firmware-dmi-handle-missing-DMI-data-gracefully.patch
mellanox-backport-introduce-psample-a-new-genetlink-channel.patch
mellanox-backport-introduce-tc-sample-action.patch
kernel-enable-psample-and-act_sample-drivers.patch
0000-net-ipv6-ll-anycast-mcast-routes-on-dev.patch
#
# This series applies on GIT commit 1451b36b2b0d62178e42f648d8a18131af18f7d8
# Tkernel-sched-core-fix-cgroup-fork-race.patch
Expand Down

0 comments on commit 5786674

Please sign in to comment.