lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <84adec63-0ccd-449c-babf-994d579f3677@gmail.com>
Date: Thu, 10 Apr 2025 15:09:13 +0700
From: Bui Quang Minh <minhquangbui99@...il.com>
To: "Michael S. Tsirkin" <mst@...hat.com>
Cc: Jason Wang <jasowang@...hat.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
 Andrew Lunn <andrew+netdev@...n.ch>, Eric Dumazet <edumazet@...gle.com>,
 Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
 Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>,
 Jesper Dangaard Brouer <hawk@...nel.org>,
 John Fastabend <john.fastabend@...il.com>,
 Eugenio Pérez <eperezma@...hat.com>,
 "David S . Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
 linux-kernel@...r.kernel.org, bpf@...r.kernel.org,
 virtualization@...ts.linux.dev
Subject: Re: [PATCH] virtio-net: hold netdev_lock when pausing rx

On 4/10/25 14:58, Michael S. Tsirkin wrote:
> On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>> napi_disable() on the receive queue's napi. In delayed refill_work, it
>> also calls napi_disable() on the receive queue's napi. When
>> napi_disable() is called on an already disabled napi, it will sleep in
>> napi_disable_locked while still holding the netdev_lock. As a result,
>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>> This leads to refill_work and the pause-then-resume tx are stuck
>> altogether.
>>
>> This scenario can be reproducible by binding a XDP socket to virtio-net
>> interface without setting up the fill ring. As a result, try_fill_recv
>> will fail until the fill ring is set up and refill_work is scheduled.
>>
>> This commit makes the pausing rx path hold the netdev_lock until
>> resuming, prevent any napi_disable() to be called on a temporarily
>> disabled napi.
>>
>> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
>> Signed-off-by: Bui Quang Minh <minhquangbui99@...il.com>
>> ---
>>   drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
>>   1 file changed, 47 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 7e4617216a4b..74bd1065c586 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
>>   }
>>
>>   static void virtnet_napi_do_enable(struct virtqueue *vq,
>> -                   struct napi_struct *napi)
>> +                   struct napi_struct *napi,
>> +                   bool netdev_locked)
>>   {
>> -    napi_enable(napi);
>> +    if (netdev_locked)
>> +        napi_enable_locked(napi);
>> +    else
>> +        napi_enable(napi);
>>
>>       /* If all buffers were filled by other side before we napi_enabled, we
>>        * won't get another interrupt, so process any outstanding packets now.
>> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
>> *vq,
>
>
>
> Your patch is line-wrapped, unfortunately. Here and elsewhere.
>
>
>
>
>>       local_bh_enable();
>>   }
>>
>> -static void virtnet_napi_enable(struct receive_queue *rq)
>> +static void virtnet_napi_enable(struct receive_queue *rq, bool
>> netdev_locked)
>>   {
>>       struct virtnet_info *vi = rq->vq->vdev->priv;
>>       int qidx = vq2rxq(rq->vq);
>>
>> -    virtnet_napi_do_enable(rq->vq, &rq->napi);
>> +    virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
>>       netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
>>   }
>>
>> -static void virtnet_napi_tx_enable(struct send_queue *sq)
>> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
>> netdev_locked)
>>   {
>>       struct virtnet_info *vi = sq->vq->vdev->priv;
>>       struct napi_struct *napi = &sq->napi;
>> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
>> *sq)
>>           return;
>>       }
>>
>> -    virtnet_napi_do_enable(sq->vq, napi);
>> +    virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
>>       netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
>>   }
>>
>> -static void virtnet_napi_tx_disable(struct send_queue *sq)
>> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
>> netdev_locked)
>>   {
>>       struct virtnet_info *vi = sq->vq->vdev->priv;
>>       struct napi_struct *napi = &sq->napi;
>> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
>> send_queue *sq)
>>
>>       if (napi->weight) {
>>           netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
>> -        napi_disable(napi);
>> +        if (netdev_locked)
>> +            napi_disable_locked(napi);
>> +        else
>> +            napi_disable(napi);
>>       }
>>   }
>>
>> -static void virtnet_napi_disable(struct receive_queue *rq)
>> +static void virtnet_napi_disable(struct receive_queue *rq, bool
>> netdev_locked)
>>   {
>>       struct virtnet_info *vi = rq->vq->vdev->priv;
>>       struct napi_struct *napi = &rq->napi;
>>       int qidx = vq2rxq(rq->vq);
>>
>>       netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
>> -    napi_disable(napi);
>> +    if (netdev_locked)
>> +        napi_disable_locked(napi);
>> +    else
>> +        napi_disable(napi);
>>   }
>>
>>   static void refill_work(struct work_struct *work)
>> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
>>            *     instance lock)
>>            *   - check netif_running() and return early to avoid a race
>>            */
>> -        napi_disable(&rq->napi);
>> +        netdev_lock(vi->dev);
>> +        napi_disable_locked(&rq->napi);
>>           still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
>
> This does mean netdev_lock is held potentially for a long while,
> while try_fill_recv and processing inside virtnet_napi_do_enable
> finish. Better ideas?
I prefer the first patch in this thread where we disable delayed refill 
and cancel all inflight refill_work before pausing rx.

Thanks,
Quang Minh.


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ