[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4d3a1478-b6fc-47a3-8d77-7eca6a973a06@gmail.com>
Date: Thu, 10 Apr 2025 14:05:57 +0700
From: Bui Quang Minh <minhquangbui99@...il.com>
To: Jason Wang <jasowang@...hat.com>
Cc: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
"Michael S . Tsirkin" <mst@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Eugenio Pérez <eperezma@...hat.com>,
"David S . Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org,
virtualization@...ts.linux.dev
Subject: [PATCH] virtio-net: hold netdev_lock when pausing rx
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
napi_disable() on the receive queue's napi. In delayed refill_work, it
also calls napi_disable() on the receive queue's napi. When
napi_disable() is called on an already disabled napi, it will sleep in
napi_disable_locked while still holding the netdev_lock. As a result,
later napi_enable gets stuck too as it cannot acquire the netdev_lock.
This leads to refill_work and the pause-then-resume tx are stuck
altogether.
This scenario can be reproducible by binding a XDP socket to virtio-net
interface without setting up the fill ring. As a result, try_fill_recv
will fail until the fill ring is set up and refill_work is scheduled.
This commit makes the pausing rx path hold the netdev_lock until
resuming, prevent any napi_disable() to be called on a temporarily
disabled napi.
Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
Signed-off-by: Bui Quang Minh <minhquangbui99@...il.com>
---
drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
1 file changed, 47 insertions(+), 27 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7e4617216a4b..74bd1065c586 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
}
static void virtnet_napi_do_enable(struct virtqueue *vq,
- struct napi_struct *napi)
+ struct napi_struct *napi,
+ bool netdev_locked)
{
- napi_enable(napi);
+ if (netdev_locked)
+ napi_enable_locked(napi);
+ else
+ napi_enable(napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets
now.
@@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct
virtqueue *vq,
local_bh_enable();
}
-static void virtnet_napi_enable(struct receive_queue *rq)
+static void virtnet_napi_enable(struct receive_queue *rq, bool
netdev_locked)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
int qidx = vq2rxq(rq->vq);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
}
-static void virtnet_napi_tx_enable(struct send_queue *sq)
+static void virtnet_napi_tx_enable(struct send_queue *sq, bool
netdev_locked)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
@@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct
send_queue *sq)
return;
}
- virtnet_napi_do_enable(sq->vq, napi);
+ virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
}
-static void virtnet_napi_tx_disable(struct send_queue *sq)
+static void virtnet_napi_tx_disable(struct send_queue *sq, bool
netdev_locked)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
@@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
send_queue *sq)
if (napi->weight) {
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
- napi_disable(napi);
+ if (netdev_locked)
+ napi_disable_locked(napi);
+ else
+ napi_disable(napi);
}
}
-static void virtnet_napi_disable(struct receive_queue *rq)
+static void virtnet_napi_disable(struct receive_queue *rq, bool
netdev_locked)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
struct napi_struct *napi = &rq->napi;
int qidx = vq2rxq(rq->vq);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
- napi_disable(napi);
+ if (netdev_locked)
+ napi_disable_locked(napi);
+ else
+ napi_disable(napi);
}
static void refill_work(struct work_struct *work)
@@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
* instance lock)
* - check netif_running() and return early to avoid a race
*/
- napi_disable(&rq->napi);
+ netdev_lock(vi->dev);
+ napi_disable_locked(&rq->napi);
still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_napi_do_enable(rq->vq, &rq->napi, true);
+ netdev_unlock(vi->dev);
/* In theory, this can happen: if we don't get any buffers in
* we will *never* try to fill again.
@@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi,
int budget)
static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
qp_index)
{
- virtnet_napi_tx_disable(&vi->sq[qp_index]);
- virtnet_napi_disable(&vi->rq[qp_index]);
+ virtnet_napi_tx_disable(&vi->sq[qp_index], false);
+ virtnet_napi_disable(&vi->rq[qp_index], false);
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}
@@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
virtnet_info *vi, int qp_index)
if (err < 0)
goto err_xdp_reg_mem_model;
- virtnet_napi_enable(&vi->rq[qp_index]);
- virtnet_napi_tx_enable(&vi->sq[qp_index]);
+ virtnet_napi_enable(&vi->rq[qp_index], false);
+ virtnet_napi_tx_enable(&vi->sq[qp_index], false);
return 0;
@@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info
*vi, struct receive_queue *rq)
bool running = netif_running(vi->dev);
if (running) {
- virtnet_napi_disable(rq);
+ netdev_lock(vi->dev);
+ virtnet_napi_disable(rq, true);
virtnet_cancel_dim(vi, &rq->dim);
}
}
@@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
*vi, struct receive_queue *rq)
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
- if (running)
- virtnet_napi_enable(rq);
+ if (running) {
+ virtnet_napi_enable(rq, true);
+ netdev_unlock(vi->dev);
+ }
}
static int virtnet_rx_resize(struct virtnet_info *vi,
@@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info
*vi, struct send_queue *sq)
qindex = sq - vi->sq;
if (running)
- virtnet_napi_tx_disable(sq);
+ virtnet_napi_tx_disable(sq, false);
txq = netdev_get_tx_queue(vi->dev, qindex);
@@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info
*vi, struct send_queue *sq)
__netif_tx_unlock_bh(txq);
if (running)
- virtnet_napi_tx_enable(sq);
+ virtnet_napi_tx_enable(sq, false);
}
static int virtnet_tx_resize(struct virtnet_info *vi, struct
send_queue *sq,
@@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
+ netdev_lock(dev);
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_disable(&vi->rq[i]);
- virtnet_napi_tx_disable(&vi->sq[i]);
+ virtnet_napi_disable(&vi->rq[i], true);
+ virtnet_napi_tx_disable(&vi->sq[i], true);
}
}
@@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
if (old_prog)
bpf_prog_put(old_prog);
if (netif_running(dev)) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], true);
+ virtnet_napi_tx_enable(&vi->sq[i], true);
}
}
+ if (netif_running(dev))
+ netdev_unlock(dev);
+
return 0;
err:
@@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], true);
+ virtnet_napi_tx_enable(&vi->sq[i], true);
}
+ netdev_unlock(dev);
}
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
--
2.43.0
Powered by blists - more mailing lists