Skip to content

Commit

Permalink
bonding: initial RCU conversion
Browse files Browse the repository at this point in the history
This patch does the initial bonding conversion to RCU. After it the
following modes are protected by RCU alone: roundrobin, active-backup,
broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for
reading, and will be dealt with later. curr_active_slave needs to be
dereferenced via rcu in the converted modes because the only thing
protecting the slave after this patch is rcu_read_lock, so we need the
proper barrier for weakly ordered archs and to make sure we don't have
stale pointer. It's not tagged with __rcu yet because there's still work
to be done to remove the curr_slave_lock, so sparse will complain when
rcu_assign_pointer and rcu_dereference are used, but the alternative to use
rcu_dereference_protected would've created much bigger code churn which is
more difficult to test and review. That will be converted in time.

1. Active-backup mode
 1.1 Perf recording while doing iperf -P 4
  - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU
                 in bonding
  - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU
                 in bonding
 1.2. Bandwidth measurements
  - old bonding: 16.1 gbps consistently
  - new bonding: 17.5 gbps consistently

2. Round-robin mode
 2.1 Perf recording while doing iperf -P 4
  - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU
                 in bonding
  - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU
                 in bonding
 2.2 Bandwidth measurements
  - old bonding: 8 gbps (variable due to packet reorderings)
  - new bonding: 10 gbps (variable due to packet reorderings)

Of course the latency has improved in all converted modes, and moreover
while
doing enslave/release (since it doesn't affect tx anymore).

Also I've stress tested all modes doing enslave/release in a loop while
transmitting traffic.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
nikolay@redhat.com authored and davem330 committed Aug 1, 2013
1 parent 1507722 commit 278b208
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 29 deletions.
2 changes: 2 additions & 0 deletions drivers/net/bonding/bond_3ad.c
Original file line number Diff line number Diff line change
Expand Up @@ -2426,6 +2426,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
struct ad_info ad_info;
int res = 1;

read_lock(&bond->lock);
if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
dev->name);
Expand Down Expand Up @@ -2475,6 +2476,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
}

out:
read_unlock(&bond->lock);
if (res) {
/* no suitable interface, frame not sent */
kfree_skb(skb);
Expand Down
6 changes: 4 additions & 2 deletions drivers/net/bonding/bond_alb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)

/* make sure that the curr_active_slave do not change during tx
*/
read_lock(&bond->lock);
read_lock(&bond->curr_slave_lock);

switch (ntohs(skb->protocol)) {
Expand Down Expand Up @@ -1441,11 +1442,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
}

read_unlock(&bond->curr_slave_lock);

read_unlock(&bond->lock);
if (res) {
/* no suitable interface, frame not sent */
kfree_skb(skb);
}

return NETDEV_TX_OK;
}

Expand Down Expand Up @@ -1663,7 +1665,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
}

swap_slave = bond->curr_active_slave;
bond->curr_active_slave = new_slave;
rcu_assign_pointer(bond->curr_active_slave, new_slave);

if (!new_slave || list_empty(&bond->slave_list))
return;
Expand Down
30 changes: 15 additions & 15 deletions drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/pkt_sched.h>
#include <linux/rculist.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
Expand Down Expand Up @@ -1037,7 +1038,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
if (new_active)
bond_set_slave_active_flags(new_active);
} else {
bond->curr_active_slave = new_active;
rcu_assign_pointer(bond->curr_active_slave, new_active);
}

if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
Expand Down Expand Up @@ -1127,7 +1128,7 @@ void bond_select_active_slave(struct bonding *bond)
*/
static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
{
list_add_tail(&new_slave->list, &bond->slave_list);
list_add_tail_rcu(&new_slave->list, &bond->slave_list);
bond->slave_cnt++;
}

Expand All @@ -1143,7 +1144,7 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
*/
static void bond_detach_slave(struct bonding *bond, struct slave *slave)
{
list_del(&slave->list);
list_del_rcu(&slave->list);
bond->slave_cnt--;
}

Expand Down Expand Up @@ -1751,7 +1752,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
* so we can change it without calling change_active_interface()
*/
if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP)
bond->curr_active_slave = new_slave;
rcu_assign_pointer(bond->curr_active_slave, new_slave);

break;
} /* switch(bond_mode) */
Expand Down Expand Up @@ -1951,7 +1952,7 @@ static int __bond_release_one(struct net_device *bond_dev,
}

if (all) {
bond->curr_active_slave = NULL;
rcu_assign_pointer(bond->curr_active_slave, NULL);
} else if (oldcurrent == slave) {
/*
* Note that we hold RTNL over this sequence, so there
Expand Down Expand Up @@ -1983,6 +1984,7 @@ static int __bond_release_one(struct net_device *bond_dev,

write_unlock_bh(&bond->lock);
unblock_netpoll_tx();
synchronize_rcu();

if (list_empty(&bond->slave_list)) {
call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
Expand Down Expand Up @@ -3811,7 +3813,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
int i = slave_id;

/* Here we start from the slave with slave_id */
bond_for_each_slave(bond, slave) {
bond_for_each_slave_rcu(bond, slave) {
if (--i < 0) {
if (slave_can_tx(slave)) {
bond_dev_queue_xmit(bond, skb, slave->dev);
Expand All @@ -3822,7 +3824,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)

/* Here we start from the first slave up to slave_id */
i = slave_id;
bond_for_each_slave(bond, slave) {
bond_for_each_slave_rcu(bond, slave) {
if (--i < 0)
break;
if (slave_can_tx(slave)) {
Expand All @@ -3848,7 +3850,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
* will send all of this type of traffic.
*/
if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
slave = bond->curr_active_slave;
slave = rcu_dereference(bond->curr_active_slave);
if (slave && slave_can_tx(slave))
bond_dev_queue_xmit(bond, skb, slave->dev);
else
Expand All @@ -3870,7 +3872,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;

slave = bond->curr_active_slave;
slave = rcu_dereference(bond->curr_active_slave);
if (slave)
bond_dev_queue_xmit(bond, skb, slave->dev);
else
Expand Down Expand Up @@ -3900,7 +3902,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave = NULL;

bond_for_each_slave(bond, slave) {
bond_for_each_slave_rcu(bond, slave) {
if (bond_is_last_slave(bond, slave))
break;
if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
Expand Down Expand Up @@ -3955,7 +3957,7 @@ static inline int bond_slave_override(struct bonding *bond,
return 1;

/* Find out if any slaves have the same mapping as this skb. */
bond_for_each_slave(bond, check_slave) {
bond_for_each_slave_rcu(bond, check_slave) {
if (check_slave->queue_id == skb->queue_mapping) {
slave = check_slave;
break;
Expand Down Expand Up @@ -4040,14 +4042,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (is_netpoll_tx_blocked(dev))
return NETDEV_TX_BUSY;

read_lock(&bond->lock);

rcu_read_lock();
if (!list_empty(&bond->slave_list))
ret = __bond_start_xmit(skb, dev);
else
kfree_skb(skb);

read_unlock(&bond->lock);
rcu_read_unlock();

return ret;
}
Expand Down
19 changes: 7 additions & 12 deletions drivers/net/bonding/bond_sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1243,16 +1243,16 @@ static ssize_t bonding_show_active_slave(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct slave *curr;
struct bonding *bond = to_bond(d);
struct slave *curr;
int count = 0;

read_lock(&bond->curr_slave_lock);
curr = bond->curr_active_slave;
read_unlock(&bond->curr_slave_lock);

rcu_read_lock();
curr = rcu_dereference(bond->curr_active_slave);
if (USES_PRIMARY(bond->params.mode) && curr)
count = sprintf(buf, "%s\n", curr->dev->name);
rcu_read_unlock();

return count;
}

Expand Down Expand Up @@ -1284,7 +1284,7 @@ static ssize_t bonding_store_active_slave(struct device *d,
if (!strlen(ifname) || buf[0] == '\n') {
pr_info("%s: Clearing current active slave.\n",
bond->dev->name);
bond->curr_active_slave = NULL;
rcu_assign_pointer(bond->curr_active_slave, NULL);
bond_select_active_slave(bond);
goto out;
}
Expand Down Expand Up @@ -1347,14 +1347,9 @@ static ssize_t bonding_show_mii_status(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct slave *curr;
struct bonding *bond = to_bond(d);

read_lock(&bond->curr_slave_lock);
curr = bond->curr_active_slave;
read_unlock(&bond->curr_slave_lock);

return sprintf(buf, "%s\n", curr ? "up" : "down");
return sprintf(buf, "%s\n", bond->curr_active_slave ? "up" : "down");
}
static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);

Expand Down
4 changes: 4 additions & 0 deletions drivers/net/bonding/bonding.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@
#define bond_for_each_slave(bond, pos) \
list_for_each_entry(pos, &(bond)->slave_list, list)

/* Caller must have rcu_read_lock */
#define bond_for_each_slave_rcu(bond, pos) \
list_for_each_entry_rcu(pos, &(bond)->slave_list, list)

/**
* bond_for_each_slave_reverse - iterate in reverse from a given position
* @bond: the bond holding this list
Expand Down

0 comments on commit 278b208

Please sign in to comment.