Skip to content

Commit 7fa6f34

Browse files
Yuval Mintzdavem330
Yuval Mintz
authored andcommitted
bnx2x: AER revised
Revised bnx2x implementation of PCI Express Advanced Error Recovery - stop and free driver resources according to the AER flow (instead of the currently implemented `hope-for-the-best' release approach), and do not make any assumptions on the HW state after slot reset. Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com> Signed-off-by: Ariel Elior <ariele@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 47a5247 commit 7fa6f34

File tree

4 files changed

+130
-30
lines changed

4 files changed

+130
-30
lines changed

drivers/net/ethernet/broadcom/bnx2x/bnx2x.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1226,10 +1226,11 @@ enum {
12261226

12271227

12281228
struct bnx2x_prev_path_list {
1229+
struct list_head list;
12291230
u8 bus;
12301231
u8 slot;
12311232
u8 path;
1232-
struct list_head list;
1233+
u8 aer;
12331234
u8 undi;
12341235
};
12351236

drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -2010,7 +2010,7 @@ static int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
20102010
* Cleans the object that have internal lists without sending
20112011
* ramrods. Should be run when interrutps are disabled.
20122012
*/
2013-
static void bnx2x_squeeze_objects(struct bnx2x *bp)
2013+
void bnx2x_squeeze_objects(struct bnx2x *bp)
20142014
{
20152015
int rc;
20162016
unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
@@ -2775,7 +2775,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
27752775
#endif /* ! BNX2X_STOP_ON_ERROR */
27762776
}
27772777

2778-
static int bnx2x_drain_tx_queues(struct bnx2x *bp)
2778+
int bnx2x_drain_tx_queues(struct bnx2x *bp)
27792779
{
27802780
u8 rc = 0, cos, i;
27812781

drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h

+4
Original file line numberDiff line numberDiff line change
@@ -1402,4 +1402,8 @@ static inline bool bnx2x_is_valid_ether_addr(struct bnx2x *bp, u8 *addr)
14021402
*
14031403
*/
14041404
void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len);
1405+
1406+
int bnx2x_drain_tx_queues(struct bnx2x *bp);
1407+
void bnx2x_squeeze_objects(struct bnx2x *bp);
1408+
14051409
#endif /* BNX2X_CMN_H */

drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

+122-27
Original file line numberDiff line numberDiff line change
@@ -9718,6 +9718,31 @@ static struct bnx2x_prev_path_list *
97189718
return NULL;
97199719
}
97209720

9721+
static int bnx2x_prev_path_mark_eeh(struct bnx2x *bp)
9722+
{
9723+
struct bnx2x_prev_path_list *tmp_list;
9724+
int rc;
9725+
9726+
rc = down_interruptible(&bnx2x_prev_sem);
9727+
if (rc) {
9728+
BNX2X_ERR("Received %d when tried to take lock\n", rc);
9729+
return rc;
9730+
}
9731+
9732+
tmp_list = bnx2x_prev_path_get_entry(bp);
9733+
if (tmp_list) {
9734+
tmp_list->aer = 1;
9735+
rc = 0;
9736+
} else {
9737+
BNX2X_ERR("path %d: Entry does not exist for eeh; Flow occurs before initial insmod is over ?\n",
9738+
BP_PATH(bp));
9739+
}
9740+
9741+
up(&bnx2x_prev_sem);
9742+
9743+
return rc;
9744+
}
9745+
97219746
static bool bnx2x_prev_is_path_marked(struct bnx2x *bp)
97229747
{
97239748
struct bnx2x_prev_path_list *tmp_list;
@@ -9726,14 +9751,15 @@ static bool bnx2x_prev_is_path_marked(struct bnx2x *bp)
97269751
if (down_trylock(&bnx2x_prev_sem))
97279752
return false;
97289753

9729-
list_for_each_entry(tmp_list, &bnx2x_prev_list, list) {
9730-
if (PCI_SLOT(bp->pdev->devfn) == tmp_list->slot &&
9731-
bp->pdev->bus->number == tmp_list->bus &&
9732-
BP_PATH(bp) == tmp_list->path) {
9754+
tmp_list = bnx2x_prev_path_get_entry(bp);
9755+
if (tmp_list) {
9756+
if (tmp_list->aer) {
9757+
DP(NETIF_MSG_HW, "Path %d was marked by AER\n",
9758+
BP_PATH(bp));
9759+
} else {
97339760
rc = true;
97349761
BNX2X_DEV_INFO("Path %d was already cleaned from previous drivers\n",
97359762
BP_PATH(bp));
9736-
break;
97379763
}
97389764
}
97399765

@@ -9747,6 +9773,28 @@ static int bnx2x_prev_mark_path(struct bnx2x *bp, bool after_undi)
97479773
struct bnx2x_prev_path_list *tmp_list;
97489774
int rc;
97499775

9776+
rc = down_interruptible(&bnx2x_prev_sem);
9777+
if (rc) {
9778+
BNX2X_ERR("Received %d when tried to take lock\n", rc);
9779+
return rc;
9780+
}
9781+
9782+
/* Check whether the entry for this path already exists */
9783+
tmp_list = bnx2x_prev_path_get_entry(bp);
9784+
if (tmp_list) {
9785+
if (!tmp_list->aer) {
9786+
BNX2X_ERR("Re-Marking the path.\n");
9787+
} else {
9788+
DP(NETIF_MSG_HW, "Removing AER indication from path %d\n",
9789+
BP_PATH(bp));
9790+
tmp_list->aer = 0;
9791+
}
9792+
up(&bnx2x_prev_sem);
9793+
return 0;
9794+
}
9795+
up(&bnx2x_prev_sem);
9796+
9797+
/* Create an entry for this path and add it */
97509798
tmp_list = kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
97519799
if (!tmp_list) {
97529800
BNX2X_ERR("Failed to allocate 'bnx2x_prev_path_list'\n");
@@ -9756,15 +9804,16 @@ static int bnx2x_prev_mark_path(struct bnx2x *bp, bool after_undi)
97569804
tmp_list->bus = bp->pdev->bus->number;
97579805
tmp_list->slot = PCI_SLOT(bp->pdev->devfn);
97589806
tmp_list->path = BP_PATH(bp);
9807+
tmp_list->aer = 0;
97599808
tmp_list->undi = after_undi ? (1 << BP_PORT(bp)) : 0;
97609809

97619810
rc = down_interruptible(&bnx2x_prev_sem);
97629811
if (rc) {
97639812
BNX2X_ERR("Received %d when tried to take lock\n", rc);
97649813
kfree(tmp_list);
97659814
} else {
9766-
BNX2X_DEV_INFO("Marked path [%d] - finished previous unload\n",
9767-
BP_PATH(bp));
9815+
DP(NETIF_MSG_HW, "Marked path [%d] - finished previous unload\n",
9816+
BP_PATH(bp));
97689817
list_add(&tmp_list->list, &bnx2x_prev_list);
97699818
up(&bnx2x_prev_sem);
97709819
}
@@ -10003,6 +10052,7 @@ static int bnx2x_prev_unload(struct bnx2x *bp)
1000310052
}
1000410053

1000510054
do {
10055+
int aer = 0;
1000610056
/* Lock MCP using an unload request */
1000710057
fw = bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0);
1000810058
if (!fw) {
@@ -10011,7 +10061,18 @@ static int bnx2x_prev_unload(struct bnx2x *bp)
1001110061
break;
1001210062
}
1001310063

10014-
if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON) {
10064+
rc = down_interruptible(&bnx2x_prev_sem);
10065+
if (rc) {
10066+
BNX2X_ERR("Cannot check for AER; Received %d when tried to take lock\n",
10067+
rc);
10068+
} else {
10069+
/* If Path is marked by EEH, ignore unload status */
10070+
aer = !!(bnx2x_prev_path_get_entry(bp) &&
10071+
bnx2x_prev_path_get_entry(bp)->aer);
10072+
}
10073+
up(&bnx2x_prev_sem);
10074+
10075+
if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON || aer) {
1001510076
rc = bnx2x_prev_unload_common(bp);
1001610077
break;
1001710078
}
@@ -12632,9 +12693,7 @@ static void bnx2x_remove_one(struct pci_dev *pdev)
1263212693

1263312694
static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
1263412695
{
12635-
int i;
12636-
12637-
bp->state = BNX2X_STATE_ERROR;
12696+
bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
1263812697

1263912698
bp->rx_mode = BNX2X_RX_MODE_NONE;
1264012699

@@ -12643,29 +12702,21 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
1264312702

1264412703
/* Stop Tx */
1264512704
bnx2x_tx_disable(bp);
12646-
12647-
bnx2x_netif_stop(bp, 0);
1264812705
/* Delete all NAPI objects */
1264912706
bnx2x_del_all_napi(bp);
1265012707
if (CNIC_LOADED(bp))
1265112708
bnx2x_del_all_napi_cnic(bp);
12709+
netdev_reset_tc(bp->dev);
1265212710

1265312711
del_timer_sync(&bp->timer);
12712+
cancel_delayed_work(&bp->sp_task);
12713+
cancel_delayed_work(&bp->period_task);
1265412714

12655-
bnx2x_stats_handle(bp, STATS_EVENT_STOP);
12656-
12657-
/* Release IRQs */
12658-
bnx2x_free_irq(bp);
12659-
12660-
/* Free SKBs, SGEs, TPA pool and driver internals */
12661-
bnx2x_free_skbs(bp);
12662-
12663-
for_each_rx_queue(bp, i)
12664-
bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
12665-
12666-
bnx2x_free_mem(bp);
12715+
spin_lock_bh(&bp->stats_lock);
12716+
bp->stats_state = STATS_STATE_DISABLED;
12717+
spin_unlock_bh(&bp->stats_lock);
1266712718

12668-
bp->state = BNX2X_STATE_CLOSED;
12719+
bnx2x_save_statistics(bp);
1266912720

1267012721
netif_carrier_off(bp->dev);
1267112722

@@ -12701,6 +12752,8 @@ static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev,
1270112752

1270212753
rtnl_lock();
1270312754

12755+
BNX2X_ERR("IO error detected\n");
12756+
1270412757
netif_device_detach(dev);
1270512758

1270612759
if (state == pci_channel_io_perm_failure) {
@@ -12711,6 +12764,8 @@ static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev,
1271112764
if (netif_running(dev))
1271212765
bnx2x_eeh_nic_unload(bp);
1271312766

12767+
bnx2x_prev_path_mark_eeh(bp);
12768+
1271412769
pci_disable_device(pdev);
1271512770

1271612771
rtnl_unlock();
@@ -12729,9 +12784,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
1272912784
{
1273012785
struct net_device *dev = pci_get_drvdata(pdev);
1273112786
struct bnx2x *bp = netdev_priv(dev);
12787+
int i;
1273212788

1273312789
rtnl_lock();
12734-
12790+
BNX2X_ERR("IO slot reset initializing...\n");
1273512791
if (pci_enable_device(pdev)) {
1273612792
dev_err(&pdev->dev,
1273712793
"Cannot re-enable PCI device after reset\n");
@@ -12745,6 +12801,42 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
1274512801
if (netif_running(dev))
1274612802
bnx2x_set_power_state(bp, PCI_D0);
1274712803

12804+
if (netif_running(dev)) {
12805+
BNX2X_ERR("IO slot reset --> driver unload\n");
12806+
if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
12807+
u32 v;
12808+
12809+
v = SHMEM2_RD(bp,
12810+
drv_capabilities_flag[BP_FW_MB_IDX(bp)]);
12811+
SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)],
12812+
v & ~DRV_FLAGS_CAPABILITIES_LOADED_L2);
12813+
}
12814+
bnx2x_drain_tx_queues(bp);
12815+
bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
12816+
bnx2x_netif_stop(bp, 1);
12817+
bnx2x_free_irq(bp);
12818+
12819+
/* Report UNLOAD_DONE to MCP */
12820+
bnx2x_send_unload_done(bp, true);
12821+
12822+
bp->sp_state = 0;
12823+
bp->port.pmf = 0;
12824+
12825+
bnx2x_prev_unload(bp);
12826+
12827+
/* We should have resetted the engine, so It's fair to
12828+
* assume the FW will no longer write to the bnx2x driver.
12829+
*/
12830+
bnx2x_squeeze_objects(bp);
12831+
bnx2x_free_skbs(bp);
12832+
for_each_rx_queue(bp, i)
12833+
bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
12834+
bnx2x_free_fp_mem(bp);
12835+
bnx2x_free_mem(bp);
12836+
12837+
bp->state = BNX2X_STATE_CLOSED;
12838+
}
12839+
1274812840
rtnl_unlock();
1274912841

1275012842
return PCI_ERS_RESULT_RECOVERED;
@@ -12771,6 +12863,9 @@ static void bnx2x_io_resume(struct pci_dev *pdev)
1277112863

1277212864
bnx2x_eeh_recover(bp);
1277312865

12866+
bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
12867+
DRV_MSG_SEQ_NUMBER_MASK;
12868+
1277412869
if (netif_running(dev))
1277512870
bnx2x_nic_load(bp, LOAD_NORMAL);
1277612871

0 commit comments

Comments
 (0)