Skip to content

Commit 42510df

Browse files
manishc88Paolo Abeni
authored andcommitted
qed/qede: Fix scheduling while atomic
Statistics read through bond interface via sysfs causes below bug and traces as it triggers the bonding module to collect the slave device statistics while holding the spinlock, beneath that qede->qed driver statistics flow gets scheduled out due to usleep_range() used in PTT acquire logic [ 3673.988874] Hardware name: HPE ProLiant DL365 Gen10 Plus/ProLiant DL365 Gen10 Plus, BIOS A42 10/29/2021 [ 3673.988878] Call Trace: [ 3673.988891] dump_stack_lvl+0x34/0x44 [ 3673.988908] __schedule_bug.cold+0x47/0x53 [ 3673.988918] __schedule+0x3fb/0x560 [ 3673.988929] schedule+0x43/0xb0 [ 3673.988932] schedule_hrtimeout_range_clock+0xbf/0x1b0 [ 3673.988937] ? __hrtimer_init+0xc0/0xc0 [ 3673.988950] usleep_range+0x5e/0x80 [ 3673.988955] qed_ptt_acquire+0x2b/0xd0 [qed] [ 3673.988981] _qed_get_vport_stats+0x141/0x240 [qed] [ 3673.989001] qed_get_vport_stats+0x18/0x80 [qed] [ 3673.989016] qede_fill_by_demand_stats+0x37/0x400 [qede] [ 3673.989028] qede_get_stats64+0x19/0xe0 [qede] [ 3673.989034] dev_get_stats+0x5c/0xc0 [ 3673.989045] netstat_show.constprop.0+0x52/0xb0 [ 3673.989055] dev_attr_show+0x19/0x40 [ 3673.989065] sysfs_kf_seq_show+0x9b/0xf0 [ 3673.989076] seq_read_iter+0x120/0x4b0 [ 3673.989087] new_sync_read+0x118/0x1a0 [ 3673.989095] vfs_read+0xf3/0x180 [ 3673.989099] ksys_read+0x5f/0xe0 [ 3673.989102] do_syscall_64+0x3b/0x90 [ 3673.989109] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3673.989115] RIP: 0033:0x7f8467d0b082 [ 3673.989119] Code: c0 e9 b2 fe ff ff 50 48 8d 3d ca 05 08 00 e8 35 e7 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 3673.989121] RSP: 002b:00007ffffb21fd08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 3673.989127] RAX: ffffffffffffffda RBX: 000000000100eca0 RCX: 00007f8467d0b082 [ 3673.989128] RDX: 00000000000003ff RSI: 00007ffffb21fdc0 RDI: 0000000000000003 [ 3673.989130] RBP: 00007f8467b96028 R08: 0000000000000010 R09: 00007ffffb21ec00 [ 3673.989132] R10: 00007ffffb27b170 R11: 0000000000000246 R12: 00000000000000f0 [ 3673.989134] R13: 0000000000000003 R14: 00007f8467b92000 R15: 0000000000045a05 [ 3673.989139] CPU: 30 PID: 285188 Comm: read_all Kdump: loaded Tainted: G W OE Fix this by collecting the statistics asynchronously from a periodic delayed work scheduled at default stats coalescing interval and return the recent copy of statisitcs from .ndo_get_stats64(), also add ability to configure/retrieve stats coalescing interval using below commands - ethtool -C ethx stats-block-usecs <val> ethtool -c ethx Fixes: 133fac0 ("qede: Add basic ethtool support") Cc: Sudarsana Kalluru <skalluru@marvell.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: Manish Chopra <manishc@marvell.com> Link: https://lore.kernel.org/r/20230605112600.48238-1-manishc@marvell.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
1 parent fb92817 commit 42510df

File tree

4 files changed

+60
-4
lines changed

4 files changed

+60
-4
lines changed

drivers/net/ethernet/qlogic/qed/qed_l2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1903,7 +1903,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
19031903
{
19041904
u32 i;
19051905

1906-
if (!cdev) {
1906+
if (!cdev || cdev->recov_in_prog) {
19071907
memset(stats, 0, sizeof(*stats));
19081908
return;
19091909
}

drivers/net/ethernet/qlogic/qede/qede.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,10 @@ struct qede_dev {
269269
#define QEDE_ERR_WARN 3
270270

271271
struct qede_dump_info dump_info;
272+
struct delayed_work periodic_task;
273+
unsigned long stats_coal_ticks;
274+
u32 stats_coal_usecs;
275+
spinlock_t stats_lock; /* lock for vport stats access */
272276
};
273277

274278
enum QEDE_STATE {

drivers/net/ethernet/qlogic/qede/qede_ethtool.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,8 @@ static void qede_get_ethtool_stats(struct net_device *dev,
429429
}
430430
}
431431

432+
spin_lock(&edev->stats_lock);
433+
432434
for (i = 0; i < QEDE_NUM_STATS; i++) {
433435
if (qede_is_irrelevant_stat(edev, i))
434436
continue;
@@ -438,6 +440,8 @@ static void qede_get_ethtool_stats(struct net_device *dev,
438440
buf++;
439441
}
440442

443+
spin_unlock(&edev->stats_lock);
444+
441445
__qede_unlock(edev);
442446
}
443447

@@ -829,6 +833,7 @@ static int qede_get_coalesce(struct net_device *dev,
829833

830834
coal->rx_coalesce_usecs = rx_coal;
831835
coal->tx_coalesce_usecs = tx_coal;
836+
coal->stats_block_coalesce_usecs = edev->stats_coal_usecs;
832837

833838
return rc;
834839
}
@@ -842,6 +847,19 @@ int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
842847
int i, rc = 0;
843848
u16 rxc, txc;
844849

850+
if (edev->stats_coal_usecs != coal->stats_block_coalesce_usecs) {
851+
edev->stats_coal_usecs = coal->stats_block_coalesce_usecs;
852+
if (edev->stats_coal_usecs) {
853+
edev->stats_coal_ticks = usecs_to_jiffies(edev->stats_coal_usecs);
854+
schedule_delayed_work(&edev->periodic_task, 0);
855+
856+
DP_INFO(edev, "Configured stats coal ticks=%lu jiffies\n",
857+
edev->stats_coal_ticks);
858+
} else {
859+
cancel_delayed_work_sync(&edev->periodic_task);
860+
}
861+
}
862+
845863
if (!netif_running(dev)) {
846864
DP_INFO(edev, "Interface is down\n");
847865
return -EINVAL;
@@ -2252,7 +2270,8 @@ static int qede_get_per_coalesce(struct net_device *dev,
22522270
}
22532271

22542272
static const struct ethtool_ops qede_ethtool_ops = {
2255-
.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
2273+
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
2274+
ETHTOOL_COALESCE_STATS_BLOCK_USECS,
22562275
.get_link_ksettings = qede_get_link_ksettings,
22572276
.set_link_ksettings = qede_set_link_ksettings,
22582277
.get_drvinfo = qede_get_drvinfo,
@@ -2303,7 +2322,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
23032322
};
23042323

23052324
static const struct ethtool_ops qede_vf_ethtool_ops = {
2306-
.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
2325+
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
2326+
ETHTOOL_COALESCE_STATS_BLOCK_USECS,
23072327
.get_link_ksettings = qede_get_link_ksettings,
23082328
.get_drvinfo = qede_get_drvinfo,
23092329
.get_msglevel = qede_get_msglevel,

drivers/net/ethernet/qlogic/qede/qede_main.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
307307

308308
edev->ops->get_vport_stats(edev->cdev, &stats);
309309

310+
spin_lock(&edev->stats_lock);
311+
310312
p_common->no_buff_discards = stats.common.no_buff_discards;
311313
p_common->packet_too_big_discard = stats.common.packet_too_big_discard;
312314
p_common->ttl0_discard = stats.common.ttl0_discard;
@@ -404,6 +406,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
404406
p_ah->tx_1519_to_max_byte_packets =
405407
stats.ah.tx_1519_to_max_byte_packets;
406408
}
409+
410+
spin_unlock(&edev->stats_lock);
407411
}
408412

409413
static void qede_get_stats64(struct net_device *dev,
@@ -412,9 +416,10 @@ static void qede_get_stats64(struct net_device *dev,
412416
struct qede_dev *edev = netdev_priv(dev);
413417
struct qede_stats_common *p_common;
414418

415-
qede_fill_by_demand_stats(edev);
416419
p_common = &edev->stats.common;
417420

421+
spin_lock(&edev->stats_lock);
422+
418423
stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts +
419424
p_common->rx_bcast_pkts;
420425
stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts +
@@ -434,6 +439,8 @@ static void qede_get_stats64(struct net_device *dev,
434439
stats->collisions = edev->stats.bb.tx_total_collisions;
435440
stats->rx_crc_errors = p_common->rx_crc_errors;
436441
stats->rx_frame_errors = p_common->rx_align_errors;
442+
443+
spin_unlock(&edev->stats_lock);
437444
}
438445

439446
#ifdef CONFIG_QED_SRIOV
@@ -1063,6 +1070,23 @@ static void qede_unlock(struct qede_dev *edev)
10631070
rtnl_unlock();
10641071
}
10651072

1073+
static void qede_periodic_task(struct work_struct *work)
1074+
{
1075+
struct qede_dev *edev = container_of(work, struct qede_dev,
1076+
periodic_task.work);
1077+
1078+
qede_fill_by_demand_stats(edev);
1079+
schedule_delayed_work(&edev->periodic_task, edev->stats_coal_ticks);
1080+
}
1081+
1082+
static void qede_init_periodic_task(struct qede_dev *edev)
1083+
{
1084+
INIT_DELAYED_WORK(&edev->periodic_task, qede_periodic_task);
1085+
spin_lock_init(&edev->stats_lock);
1086+
edev->stats_coal_usecs = USEC_PER_SEC;
1087+
edev->stats_coal_ticks = usecs_to_jiffies(USEC_PER_SEC);
1088+
}
1089+
10661090
static void qede_sp_task(struct work_struct *work)
10671091
{
10681092
struct qede_dev *edev = container_of(work, struct qede_dev,
@@ -1082,6 +1106,7 @@ static void qede_sp_task(struct work_struct *work)
10821106
*/
10831107

10841108
if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
1109+
cancel_delayed_work_sync(&edev->periodic_task);
10851110
#ifdef CONFIG_QED_SRIOV
10861111
/* SRIOV must be disabled outside the lock to avoid a deadlock.
10871112
* The recovery of the active VFs is currently not supported.
@@ -1272,6 +1297,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
12721297
*/
12731298
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
12741299
mutex_init(&edev->qede_lock);
1300+
qede_init_periodic_task(edev);
12751301

12761302
rc = register_netdev(edev->ndev);
12771303
if (rc) {
@@ -1296,6 +1322,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
12961322
edev->rx_copybreak = QEDE_RX_HDR_SIZE;
12971323

12981324
qede_log_probe(edev);
1325+
1326+
/* retain user config (for example - after recovery) */
1327+
if (edev->stats_coal_usecs)
1328+
schedule_delayed_work(&edev->periodic_task, 0);
1329+
12991330
return 0;
13001331

13011332
err4:
@@ -1364,6 +1395,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
13641395
unregister_netdev(ndev);
13651396

13661397
cancel_delayed_work_sync(&edev->sp_task);
1398+
cancel_delayed_work_sync(&edev->periodic_task);
13671399

13681400
edev->ops->common->set_power_state(cdev, PCI_D0);
13691401

0 commit comments

Comments
 (0)