Skip to content

Commit 0622cab

Browse files
jay-vosburghdavem330
authored andcommitted
bonding: fix 802.3ad aggregator reselection
Since commit 7bb11dc ("bonding: unify all places where actor-oper key needs to be updated."), the logic in bonding to handle selection between multiple aggregators has not functioned. This affects only configurations wherein the bonding slaves connect to two discrete aggregators (e.g., two independent switches, each with LACP enabled), thus creating two separate aggregation groups within a single bond. The cause is a change in 7bb11dc to no longer set AD_PORT_BEGIN on a port after a link state change, which would cause the port to be reselected for attachment to an aggregator as if were newly added to the bond. We cannot restore the prior behavior, as it contradicts IEEE 802.1AX 5.4.12, which requires ports that "become inoperable" (lose carrier, setting port_enabled=false as per 802.1AX 5.4.7) to remain selected (i.e., assigned to the aggregator). As the port now remains selected, the aggregator selection logic is not invoked. A side effect of this change is that aggregators in bonding will now contain ports that are link down. The aggregator selection logic does not currently handle this situation correctly, causing incorrect aggregator selection. This patch makes two changes to repair the aggregator selection logic in bonding to function as documented and within the confines of the standard: First, the aggregator selection and related logic now utilizes the number of active ports per aggregator, not the number of selected ports (as some selected ports may be down). The ad_select "bandwidth" and "count" options only consider ports that are link up. Second, on any carrier state change of any slave, the aggregator selection logic is explicitly called to insure the correct aggregator is active. Reported-by: Veli-Matti Lintu <veli-matti.lintu@opinsys.fi> Fixes: 7bb11dc ("bonding: unify all places where actor-oper key needs to be updated.") Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 70a0dec commit 0622cab

File tree

1 file changed

+45
-19
lines changed

1 file changed

+45
-19
lines changed

drivers/net/bonding/bond_3ad.c

+45-19
Original file line numberDiff line numberDiff line change
@@ -657,46 +657,61 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
657657
}
658658
}
659659

660+
static int __agg_active_ports(struct aggregator *agg)
661+
{
662+
struct port *port;
663+
int active = 0;
664+
665+
for (port = agg->lag_ports; port;
666+
port = port->next_port_in_aggregator) {
667+
if (port->is_enabled)
668+
active++;
669+
}
670+
671+
return active;
672+
}
673+
660674
/**
661675
* __get_agg_bandwidth - get the total bandwidth of an aggregator
662676
* @aggregator: the aggregator we're looking at
663677
*
664678
*/
665679
static u32 __get_agg_bandwidth(struct aggregator *aggregator)
666680
{
681+
int nports = __agg_active_ports(aggregator);
667682
u32 bandwidth = 0;
668683

669-
if (aggregator->num_of_ports) {
684+
if (nports) {
670685
switch (__get_link_speed(aggregator->lag_ports)) {
671686
case AD_LINK_SPEED_1MBPS:
672-
bandwidth = aggregator->num_of_ports;
687+
bandwidth = nports;
673688
break;
674689
case AD_LINK_SPEED_10MBPS:
675-
bandwidth = aggregator->num_of_ports * 10;
690+
bandwidth = nports * 10;
676691
break;
677692
case AD_LINK_SPEED_100MBPS:
678-
bandwidth = aggregator->num_of_ports * 100;
693+
bandwidth = nports * 100;
679694
break;
680695
case AD_LINK_SPEED_1000MBPS:
681-
bandwidth = aggregator->num_of_ports * 1000;
696+
bandwidth = nports * 1000;
682697
break;
683698
case AD_LINK_SPEED_2500MBPS:
684-
bandwidth = aggregator->num_of_ports * 2500;
699+
bandwidth = nports * 2500;
685700
break;
686701
case AD_LINK_SPEED_10000MBPS:
687-
bandwidth = aggregator->num_of_ports * 10000;
702+
bandwidth = nports * 10000;
688703
break;
689704
case AD_LINK_SPEED_20000MBPS:
690-
bandwidth = aggregator->num_of_ports * 20000;
705+
bandwidth = nports * 20000;
691706
break;
692707
case AD_LINK_SPEED_40000MBPS:
693-
bandwidth = aggregator->num_of_ports * 40000;
708+
bandwidth = nports * 40000;
694709
break;
695710
case AD_LINK_SPEED_56000MBPS:
696-
bandwidth = aggregator->num_of_ports * 56000;
711+
bandwidth = nports * 56000;
697712
break;
698713
case AD_LINK_SPEED_100000MBPS:
699-
bandwidth = aggregator->num_of_ports * 100000;
714+
bandwidth = nports * 100000;
700715
break;
701716
default:
702717
bandwidth = 0; /* to silence the compiler */
@@ -1530,10 +1545,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
15301545

15311546
switch (__get_agg_selection_mode(curr->lag_ports)) {
15321547
case BOND_AD_COUNT:
1533-
if (curr->num_of_ports > best->num_of_ports)
1548+
if (__agg_active_ports(curr) > __agg_active_ports(best))
15341549
return curr;
15351550

1536-
if (curr->num_of_ports < best->num_of_ports)
1551+
if (__agg_active_ports(curr) < __agg_active_ports(best))
15371552
return best;
15381553

15391554
/*FALLTHROUGH*/
@@ -1561,8 +1576,14 @@ static int agg_device_up(const struct aggregator *agg)
15611576
if (!port)
15621577
return 0;
15631578

1564-
return netif_running(port->slave->dev) &&
1565-
netif_carrier_ok(port->slave->dev);
1579+
for (port = agg->lag_ports; port;
1580+
port = port->next_port_in_aggregator) {
1581+
if (netif_running(port->slave->dev) &&
1582+
netif_carrier_ok(port->slave->dev))
1583+
return 1;
1584+
}
1585+
1586+
return 0;
15661587
}
15671588

15681589
/**
@@ -1610,7 +1631,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
16101631

16111632
agg->is_active = 0;
16121633

1613-
if (agg->num_of_ports && agg_device_up(agg))
1634+
if (__agg_active_ports(agg) && agg_device_up(agg))
16141635
best = ad_agg_selection_test(best, agg);
16151636
}
16161637

@@ -1622,7 +1643,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
16221643
* answering partner.
16231644
*/
16241645
if (active && active->lag_ports &&
1625-
active->lag_ports->is_enabled &&
1646+
__agg_active_ports(active) &&
16261647
(__agg_has_partner(active) ||
16271648
(!__agg_has_partner(active) &&
16281649
!__agg_has_partner(best)))) {
@@ -2133,7 +2154,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
21332154
else
21342155
temp_aggregator->lag_ports = temp_port->next_port_in_aggregator;
21352156
temp_aggregator->num_of_ports--;
2136-
if (temp_aggregator->num_of_ports == 0) {
2157+
if (__agg_active_ports(temp_aggregator) == 0) {
21372158
select_new_active_agg = temp_aggregator->is_active;
21382159
ad_clear_agg(temp_aggregator);
21392160
if (select_new_active_agg) {
@@ -2432,7 +2453,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave)
24322453
*/
24332454
void bond_3ad_handle_link_change(struct slave *slave, char link)
24342455
{
2456+
struct aggregator *agg;
24352457
struct port *port;
2458+
bool dummy;
24362459

24372460
port = &(SLAVE_AD_INFO(slave)->port);
24382461

@@ -2459,6 +2482,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
24592482
port->is_enabled = false;
24602483
ad_update_actor_keys(port, true);
24612484
}
2485+
agg = __get_first_agg(port);
2486+
ad_agg_selection_logic(agg, &dummy);
2487+
24622488
netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n",
24632489
port->actor_port_number,
24642490
link == BOND_LINK_UP ? "UP" : "DOWN");
@@ -2499,7 +2525,7 @@ int bond_3ad_set_carrier(struct bonding *bond)
24992525
active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
25002526
if (active) {
25012527
/* are enough slaves available to consider link up? */
2502-
if (active->num_of_ports < bond->params.min_links) {
2528+
if (__agg_active_ports(active) < bond->params.min_links) {
25032529
if (netif_carrier_ok(bond->dev)) {
25042530
netif_carrier_off(bond->dev);
25052531
goto out;

0 commit comments

Comments
 (0)