Skip to content

Commit

Permalink
Revert "virtio_net: Add a lock for per queue RX coalesce"
Browse files Browse the repository at this point in the history
This reverts commit 4d4ac2e.

When the following snippet is run, lockdep will report a deadlock[1].

  /* Acquire all queues dim_locks */
  for (i = 0; i < vi->max_queue_pairs; i++)
          mutex_lock(&vi->rq[i].dim_lock);

There's no deadlock here because the vq locks are always taken
in the same order, but lockdep can not figure it out, and we
can not make each lock a separate class because there can be more
than MAX_LOCKDEP_SUBCLASSES of vqs.

However, dropping the lock is harmless:
  1. If dim is enabled, modifications made by dim worker to coalescing
     params may cause the user's query results to be dirty data.
  2. In scenarios (a) and (b), a spurious dim worker is scheduled,
     but this can be handled correctly:
     (a)
       1. dim is on
       2. net_dim call schedules a worker
       3. dim is turning off
       4. The worker checks that dim is off and then exits after
          restoring dim's state.
       5. The worker will not be scheduled until the next time dim is on.

     (b)
       1. dim is on
       2. net_dim call schedules a worker
       3. The worker checks that dim is on and keeps going
       4. dim is turning off
       5. The worker successfully configure this parameter to the device.
       6. The worker will not be scheduled until the next time dim is on.

[1]
========================================================
WARNING: possible recursive locking detected
6.9.0-rc7+ torvalds#319 Not tainted
--------------------------------------------
ethtool/962 is trying to acquire lock:

but task is already holding lock:

other info that might help us debug this:
Possible unsafe locking scenario:

      CPU0
      ----
 lock(&vi->rq[i].dim_lock);
 lock(&vi->rq[i].dim_lock);

*** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by ethtool/962:
 #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40
 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at:
				ethnl_default_set_doit+0xbe/0x1e0

stack backtrace:
CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ torvalds#319
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
	   rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x79/0xb0
 check_deadlock+0x130/0x220
 __lock_acquire+0x861/0x990
 lock_acquire.part.0+0x72/0x1d0
 ? lock_acquire+0xf8/0x130
 __mutex_lock+0x71/0xd50
 virtnet_set_coalesce+0x151/0x190
 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0
 ethnl_set_coalesce+0x34/0x90
 ethnl_default_set_doit+0xdd/0x1e0
 genl_family_rcv_msg_doit+0xdc/0x130
 genl_family_rcv_msg+0x154/0x230
 ? __pfx_ethnl_default_set_doit+0x10/0x10
 genl_rcv_msg+0x4b/0xa0
 ? __pfx_genl_rcv_msg+0x10/0x10
 netlink_rcv_skb+0x5a/0x110
 genl_rcv+0x28/0x40
 netlink_unicast+0x1af/0x280
 netlink_sendmsg+0x20e/0x460
 __sys_sendto+0x1fe/0x210
 ? find_held_lock+0x2b/0x80
 ? do_user_addr_fault+0x3a2/0x8a0
 ? __lock_release+0x5e/0x160
 ? do_user_addr_fault+0x3a2/0x8a0
 ? lock_release+0x72/0x140
 ? do_user_addr_fault+0x3a7/0x8a0
 __x64_sys_sendto+0x29/0x30
 do_syscall_64+0x78/0x180
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: 4d4ac2e ("virtio_net: Add a lock for per queue RX coalesce")
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: NipaLocal <nipa@local>
  • Loading branch information
Heng Qi authored and NipaLocal committed May 26, 2024
1 parent 9b25cc1 commit a5167e7
Showing 1 changed file with 12 additions and 41 deletions.
53 changes: 12 additions & 41 deletions drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,6 @@ struct receive_queue {
/* Is dynamic interrupt moderation enabled? */
bool dim_enabled;

/* Used to protect dim_enabled and inter_coal */
struct mutex dim_lock;

/* Dynamic Interrupt Moderation */
struct dim dim;

Expand Down Expand Up @@ -2368,10 +2365,6 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
/* Out of packets? */
if (received < budget) {
napi_complete = virtqueue_napi_complete(napi, rq->vq, received);
/* Intentionally not taking dim_lock here. This may result in a
* spurious net_dim call. But if that happens virtnet_rx_dim_work
* will not act on the scheduled work.
*/
if (napi_complete && rq->dim_enabled)
virtnet_rx_dim_update(vi, rq);
}
Expand Down Expand Up @@ -3247,11 +3240,9 @@ static int virtnet_set_ringparam(struct net_device *dev,
return err;

/* The reason is same as the transmit virtqueue reset */
mutex_lock(&vi->rq[i].dim_lock);
err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i,
vi->intr_coal_rx.max_usecs,
vi->intr_coal_rx.max_packets);
mutex_unlock(&vi->rq[i].dim_lock);
if (err)
return err;
}
Expand Down Expand Up @@ -4257,7 +4248,6 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
struct scatterlist sgs_rx;
int ret = 0;
int i;

if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
Expand All @@ -4267,22 +4257,16 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
return -EINVAL;

/* Acquire all queues dim_locks */
for (i = 0; i < vi->max_queue_pairs; i++)
mutex_lock(&vi->rq[i].dim_lock);

if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
vi->rx_dim_enabled = true;
for (i = 0; i < vi->max_queue_pairs; i++)
vi->rq[i].dim_enabled = true;
goto unlock;
return 0;
}

coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
if (!coal_rx) {
ret = -ENOMEM;
goto unlock;
}
if (!coal_rx)
return -ENOMEM;

if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
vi->rx_dim_enabled = false;
Expand All @@ -4300,22 +4284,17 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,

if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
&sgs_rx)) {
ret = -EINVAL;
goto unlock;
}
&sgs_rx))
return -EINVAL;

vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
for (i = 0; i < vi->max_queue_pairs; i++) {
vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
}
unlock:
for (i = vi->max_queue_pairs - 1; i >= 0; i--)
mutex_unlock(&vi->rq[i].dim_lock);

return ret;
return 0;
}

static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
Expand All @@ -4339,24 +4318,19 @@ static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
u16 queue)
{
bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
bool cur_rx_dim = vi->rq[queue].dim_enabled;
u32 max_usecs, max_packets;
bool cur_rx_dim;
int err;

mutex_lock(&vi->rq[queue].dim_lock);
cur_rx_dim = vi->rq[queue].dim_enabled;
max_usecs = vi->rq[queue].intr_coal.max_usecs;
max_packets = vi->rq[queue].intr_coal.max_packets;

if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs ||
ec->rx_max_coalesced_frames != max_packets)) {
mutex_unlock(&vi->rq[queue].dim_lock);
ec->rx_max_coalesced_frames != max_packets))
return -EINVAL;
}

if (rx_ctrl_dim_on && !cur_rx_dim) {
vi->rq[queue].dim_enabled = true;
mutex_unlock(&vi->rq[queue].dim_lock);
return 0;
}

Expand All @@ -4369,8 +4343,10 @@ static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
ec->rx_coalesce_usecs,
ec->rx_max_coalesced_frames);
mutex_unlock(&vi->rq[queue].dim_lock);
return err;
if (err)
return err;

return 0;
}

static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
Expand Down Expand Up @@ -4404,7 +4380,6 @@ static void virtnet_rx_dim_work(struct work_struct *work)

qnum = rq - vi->rq;

mutex_lock(&rq->dim_lock);
if (!rq->dim_enabled)
goto out;

Expand All @@ -4420,7 +4395,6 @@ static void virtnet_rx_dim_work(struct work_struct *work)
}
out:
dim->state = DIM_START_MEASURE;
mutex_unlock(&rq->dim_lock);
}

static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
Expand Down Expand Up @@ -4558,13 +4532,11 @@ static int virtnet_get_per_queue_coalesce(struct net_device *dev,
return -EINVAL;

if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
mutex_lock(&vi->rq[queue].dim_lock);
ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
mutex_unlock(&vi->rq[queue].dim_lock);
} else {
ec->rx_max_coalesced_frames = 1;

Expand Down Expand Up @@ -5396,7 +5368,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)

u64_stats_init(&vi->rq[i].stats.syncp);
u64_stats_init(&vi->sq[i].stats.syncp);
mutex_init(&vi->rq[i].dim_lock);
}

return 0;
Expand Down

0 comments on commit a5167e7

Please sign in to comment.