Skip to content

Commit 1178dab

Browse files
etantilovNipaLocal
authored andcommitted
idpf: detach and close netdevs while handling a reset
Protect the reset path from callbacks by setting the netdevs to detached state and close any netdevs in UP state until the reset handling has completed. During a reset, the driver will de-allocate resources for the vport, and there is no guarantee that those will recover, which is why the existing vport_ctrl_lock does not provide sufficient protection. idpf_detach_and_close() is called right before reset handling. If the reset handling succeeds, the netdevs state is recovered via call to idpf_attach_and_open(). If the reset handling fails the netdevs remain down. The detach/down calls are protected with RTNL lock to avoid racing with callbacks. On the recovery side the attach can be done without holding the RTNL lock as there are no callbacks expected at that point, due to detach/close always being done first in that flow. The previous logic restoring the netdevs state based on the IDPF_VPORT_UP_REQUESTED flag in the init task is not needed anymore, hence the removal of idpf_set_vport_state(). The IDPF_VPORT_UP_REQUESTED is still being used to restore the state of the netdevs following the reset, but has no use outside of the reset handling flow. idpf_init_hard_reset() is converted to void, since it was used as such and there is no error handling being done based on its return value. Before this change, invoking hard and soft resets simultaneously will cause the driver to lose the vport state: ip -br a <inf> UP echo 1 > /sys/class/net/ens801f0/device/reset& \ ethtool -L ens801f0 combined 8 ip -br a <inf> DOWN ip link set <inf> up ip -br a <inf> DOWN Also in case of a failure in the reset path, the netdev is left exposed to external callbacks, while vport resources are not initialized, leading to a crash on subsequent ifup/down: [408471.398966] idpf 0000:83:00.0: HW reset detected [408471.411744] idpf 0000:83:00.0: Device HW Reset initiated [408472.277901] idpf 0000:83:00.0: The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x2 [408508.125551] BUG: kernel NULL pointer dereference, address: 0000000000000078 [408508.126112] #PF: supervisor read access in kernel mode [408508.126687] #PF: error_code(0x0000) - not-present page [408508.127256] PGD 2aae2f067 P4D 0 [408508.127824] Oops: Oops: 0000 [#1] SMP NOPTI ... [408508.130871] RIP: 0010:idpf_stop+0x39/0x70 [idpf] ... [408508.139193] Call Trace: [408508.139637] <TASK> [408508.140077] __dev_close_many+0xbb/0x260 [408508.140533] __dev_change_flags+0x1cf/0x280 [408508.140987] netif_change_flags+0x26/0x70 [408508.141434] dev_change_flags+0x3d/0xb0 [408508.141878] devinet_ioctl+0x460/0x890 [408508.142321] inet_ioctl+0x18e/0x1d0 [408508.142762] ? _copy_to_user+0x22/0x70 [408508.143207] sock_do_ioctl+0x3d/0xe0 [408508.143652] sock_ioctl+0x10e/0x330 [408508.144091] ? find_held_lock+0x2b/0x80 [408508.144537] __x64_sys_ioctl+0x96/0xe0 [408508.144979] do_syscall_64+0x79/0x3d0 [408508.145415] entry_SYSCALL_64_after_hwframe+0x76/0x7e [408508.145860] RIP: 0033:0x7f3e0bb4caff Fixes: 0fe4546 ("idpf: add create vport and netdev configuration") Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com> Reviewed-by: Madhu Chittim <madhu.chittim@intel.com> Tested-by: Samuel Salin <Samuel.salin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Signed-off-by: NipaLocal <nipa@local>
1 parent 043dfe7 commit 1178dab

File tree

1 file changed

+72
-49
lines changed

1 file changed

+72
-49
lines changed

drivers/net/ethernet/intel/idpf/idpf_lib.c

Lines changed: 72 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,65 @@ static int idpf_init_mac_addr(struct idpf_vport *vport,
729729
return 0;
730730
}
731731

732+
static void idpf_detach_and_close(struct idpf_adapter *adapter)
733+
{
734+
int max_vports = adapter->max_vports;
735+
736+
for (int i = 0; i < max_vports; i++) {
737+
struct net_device *netdev = adapter->netdevs[i];
738+
739+
/* If the interface is in detached state, that means the
740+
* previous reset was not handled successfully for this
741+
* vport.
742+
*/
743+
if (!netif_device_present(netdev))
744+
continue;
745+
746+
/* Hold RTNL to protect racing with callbacks */
747+
rtnl_lock();
748+
netif_device_detach(netdev);
749+
if (netif_running(netdev)) {
750+
set_bit(IDPF_VPORT_UP_REQUESTED,
751+
adapter->vport_config[i]->flags);
752+
dev_close(netdev);
753+
}
754+
rtnl_unlock();
755+
}
756+
}
757+
758+
static void idpf_attach_and_open(struct idpf_adapter *adapter)
759+
{
760+
int max_vports = adapter->max_vports;
761+
762+
for (int i = 0; i < max_vports; i++) {
763+
struct idpf_vport *vport = adapter->vports[i];
764+
struct idpf_vport_config *vport_config;
765+
struct net_device *netdev;
766+
767+
/* In case of a critical error in the init task, the vport
768+
* will be freed. Only continue to restore the netdevs
769+
* if the vport is allocated.
770+
*/
771+
if (!vport)
772+
continue;
773+
774+
/* No need for RTNL on attach as this function is called
775+
* following detach and dev_close(). We do take RTNL for
776+
* dev_open() below as it can race with external callbacks
777+
* following the call to netif_device_attach().
778+
*/
779+
netdev = adapter->netdevs[i];
780+
netif_device_attach(netdev);
781+
vport_config = adapter->vport_config[vport->idx];
782+
if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED,
783+
vport_config->flags)) {
784+
rtnl_lock();
785+
dev_open(netdev, NULL);
786+
rtnl_unlock();
787+
}
788+
}
789+
}
790+
732791
/**
733792
* idpf_cfg_netdev - Allocate, configure and register a netdev
734793
* @vport: main vport structure
@@ -1041,10 +1100,11 @@ static void idpf_vport_dealloc(struct idpf_vport *vport)
10411100
idpf_idc_deinit_vport_aux_device(vport->vdev_info);
10421101

10431102
idpf_deinit_mac_addr(vport);
1044-
idpf_vport_stop(vport, true);
10451103

1046-
if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
1104+
if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) {
1105+
idpf_vport_stop(vport, true);
10471106
idpf_decfg_netdev(vport);
1107+
}
10481108
if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
10491109
idpf_del_all_mac_filters(vport);
10501110

@@ -1544,7 +1604,6 @@ void idpf_init_task(struct work_struct *work)
15441604
struct idpf_vport_config *vport_config;
15451605
struct idpf_vport_max_q max_q;
15461606
struct idpf_adapter *adapter;
1547-
struct idpf_netdev_priv *np;
15481607
struct idpf_vport *vport;
15491608
u16 num_default_vports;
15501609
struct pci_dev *pdev;
@@ -1600,12 +1659,6 @@ void idpf_init_task(struct work_struct *work)
16001659
if (idpf_cfg_netdev(vport))
16011660
goto unwind_vports;
16021661

1603-
/* Once state is put into DOWN, driver is ready for dev_open */
1604-
np = netdev_priv(vport->netdev);
1605-
clear_bit(IDPF_VPORT_UP, np->state);
1606-
if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags))
1607-
idpf_vport_open(vport, true);
1608-
16091662
/* Spawn and return 'idpf_init_task' work queue until all the
16101663
* default vports are created
16111664
*/
@@ -1781,27 +1834,6 @@ static int idpf_check_reset_complete(struct idpf_hw *hw,
17811834
return -EBUSY;
17821835
}
17831836

1784-
/**
1785-
* idpf_set_vport_state - Set the vport state to be after the reset
1786-
* @adapter: Driver specific private structure
1787-
*/
1788-
static void idpf_set_vport_state(struct idpf_adapter *adapter)
1789-
{
1790-
u16 i;
1791-
1792-
for (i = 0; i < adapter->max_vports; i++) {
1793-
struct idpf_netdev_priv *np;
1794-
1795-
if (!adapter->netdevs[i])
1796-
continue;
1797-
1798-
np = netdev_priv(adapter->netdevs[i]);
1799-
if (test_bit(IDPF_VPORT_UP, np->state))
1800-
set_bit(IDPF_VPORT_UP_REQUESTED,
1801-
adapter->vport_config[i]->flags);
1802-
}
1803-
}
1804-
18051837
/**
18061838
* idpf_init_hard_reset - Initiate a hardware reset
18071839
* @adapter: Driver specific private structure
@@ -1810,28 +1842,17 @@ static void idpf_set_vport_state(struct idpf_adapter *adapter)
18101842
* reallocate. Also reinitialize the mailbox. Return 0 on success,
18111843
* negative on failure.
18121844
*/
1813-
static int idpf_init_hard_reset(struct idpf_adapter *adapter)
1845+
static void idpf_init_hard_reset(struct idpf_adapter *adapter)
18141846
{
18151847
struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops;
18161848
struct device *dev = &adapter->pdev->dev;
1817-
struct net_device *netdev;
18181849
int err;
1819-
u16 i;
18201850

1851+
idpf_detach_and_close(adapter);
18211852
mutex_lock(&adapter->vport_ctrl_lock);
18221853

18231854
dev_info(dev, "Device HW Reset initiated\n");
18241855

1825-
/* Avoid TX hangs on reset */
1826-
for (i = 0; i < adapter->max_vports; i++) {
1827-
netdev = adapter->netdevs[i];
1828-
if (!netdev)
1829-
continue;
1830-
1831-
netif_carrier_off(netdev);
1832-
netif_tx_disable(netdev);
1833-
}
1834-
18351856
/* Prepare for reset */
18361857
if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
18371858
reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD);
@@ -1840,7 +1861,6 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
18401861

18411862
idpf_idc_issue_reset_event(adapter->cdev_info);
18421863

1843-
idpf_set_vport_state(adapter);
18441864
idpf_vc_core_deinit(adapter);
18451865
if (!is_reset)
18461866
reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET);
@@ -1887,11 +1907,14 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
18871907
unlock_mutex:
18881908
mutex_unlock(&adapter->vport_ctrl_lock);
18891909

1890-
/* Wait until all vports are created to init RDMA CORE AUX */
1891-
if (!err)
1892-
err = idpf_idc_init(adapter);
1893-
1894-
return err;
1910+
/* Attempt to restore netdevs and initialize RDMA CORE AUX device,
1911+
* provided vc_core_init succeeded. It is still possible that
1912+
* vports are not allocated at this point if the init task failed.
1913+
*/
1914+
if (!err) {
1915+
idpf_attach_and_open(adapter);
1916+
idpf_idc_init(adapter);
1917+
}
18951918
}
18961919

18971920
/**

0 commit comments

Comments
 (0)