[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aac402b4-d04c-4d7e-91c8-ab6c20c9a74d@gmail.com>
Date: Wed, 23 Apr 2025 22:20:41 +0700
From: Bui Quang Minh <minhquangbui99@...il.com>
To: Jakub Kicinski <kuba@...nel.org>
Cc: virtualization@...ts.linux.dev, "Michael S. Tsirkin" <mst@...hat.com>,
Jason Wang <jasowang@...hat.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Andrew Lunn <andrew+netdev@...n.ch>, Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Eugenio Pérez <eperezma@...hat.com>,
"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH v4 4/4] selftests: net: add a virtio_net deadlock selftest
On 4/23/25 08:41, Jakub Kicinski wrote:
> On Thu, 17 Apr 2025 14:28:06 +0700 Bui Quang Minh wrote:
>> The selftest reproduces the deadlock scenario when binding/unbinding XDP
>> program, XDP socket, rx ring resize on virtio_net interface.
>>
>> Signed-off-by: Bui Quang Minh <minhquangbui99@...il.com>
>> ---
>> .../testing/selftests/drivers/net/hw/Makefile | 1 +
>> .../selftests/drivers/net/hw/virtio_net.py | 65 +++++++++++++++++++
>> 2 files changed, 66 insertions(+)
>> create mode 100755 tools/testing/selftests/drivers/net/hw/virtio_net.py
>>
>> diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
>> index 07cddb19ba35..b5af7c1412bf 100644
>> --- a/tools/testing/selftests/drivers/net/hw/Makefile
>> +++ b/tools/testing/selftests/drivers/net/hw/Makefile
>> @@ -21,6 +21,7 @@ TEST_PROGS = \
>> rss_ctx.py \
>> rss_input_xfrm.py \
>> tso.py \
>> + virtio_net.py \
> Maybe xsk_reconfig.py ? Other drivers will benefit from this test, too,
> and that's a more descriptive name.
>
>> #
>>
>> TEST_FILES := \
>> diff --git a/tools/testing/selftests/drivers/net/hw/virtio_net.py b/tools/testing/selftests/drivers/net/hw/virtio_net.py
>> new file mode 100755
>> index 000000000000..7cad7ab98635
>> --- /dev/null
>> +++ b/tools/testing/selftests/drivers/net/hw/virtio_net.py
>> @@ -0,0 +1,65 @@
>> +#!/usr/bin/env python3
>> +# SPDX-License-Identifier: GPL-2.0
>> +
>> +# This is intended to be run on a virtio-net guest interface.
>> +# The test binds the XDP socket to the interface without setting
>> +# the fill ring to trigger delayed refill_work. This helps to
>> +# make it easier to reproduce the deadlock when XDP program,
>> +# XDP socket bind/unbind, rx ring resize race with refill_work on
>> +# the buggy kernel.
>> +#
>> +# The Qemu command to setup virtio-net
>> +# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
>> +# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
>> +
>> +from lib.py import ksft_exit, ksft_run
>> +from lib.py import KsftSkipEx, KsftFailEx
>> +from lib.py import NetDrvEnv
>> +from lib.py import bkg, ip, cmd, ethtool
>> +import re
>> +
>> +def _get_rx_ring_entries(cfg):
>> + output = ethtool(f"-g {cfg.ifname}").stdout
>> + values = re.findall(r'RX:\s+(\d+)', output)
> no need for the regexps, ethtool -g supports json formatting:
>
> output = ethtool(f"-g {cfg.ifname}", json=True)[0]
> return output["rx"]
>
> ?
>
>> + return int(values[1])
>> +
>> +def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
>> + # Probe for support
>> + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
>> + if xdp.ret == 255:
>> + raise KsftSkipEx('AF_XDP unsupported')
>> + elif xdp.ret > 0:
>> + raise KsftFailEx('unable to create AF_XDP socket')
>> +
>> + try:
>> + xsk_bkg = bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
>> + '{xdp_queue_id} -z', ksft_wait=3)
> This process will time out after 3 seconds but the test really
> shouldn't leave things running after it exits. Don't worry about
> the couple of seconds of execution time. Wrap each test in
>
> with bkg(f"... the exec info ... "):
> # test code here
>
> The bkg() class has an __exit__() handle once the test finishes
> and leaves the with block it will terminate.
I've tried to make the setup_xsk into each test. However, I've an issue
that the XDP socket destruct waits for an RCU grace period as I see this
sock's flag SOCK_RCU_FREE is set. So if we start the next test right
away, we can have the error when setting up XDP socket again because
previous XDP socket has not unbound the network interface's queue yet. I
can resolve the issue by putting the sleep(1) after closing the socket
in xdp_helper:
diff --git a/tools/testing/selftests/net/lib/xdp_helper.c
b/tools/testing/selftests/net/lib/xdp_helper.c
index f21536ab95ba..e882bb22877f 100644
--- a/tools/testing/selftests/net/lib/xdp_helper.c
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -162,5 +162,6 @@ int main(int argc, char **argv)
*/
close(sock_fd);
+ sleep(1);
return 0;
}
Do you think it's enough or do you have a better suggestion here?
Thanks,
Quang Minh.
>
>> + return xsk_bkg
>> + except:
>> + raise KsftSkipEx('Failed to bind XDP socket in zerocopy. ' \
>> + 'Please consider adding iommu_platform=on ' \
>> + 'when setting up virtio-net-pci')
>> +
>> +def check_xdp_bind(cfg):
>> + ip(f"link set dev %s xdp obj %s sec xdp" %
>> + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
>> + ip(f"link set dev %s xdp off" % cfg.ifname)
>> +
>> +def check_rx_resize(cfg, queue_size = 128):
>> + rx_ring = _get_rx_ring_entries(cfg)
>> + ethtool(f"-G %s rx %d" % (cfg.ifname, queue_size))
>> + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
> Why guess the ring size? What if it's already 128? I usually do:
>
> rx_ring = _get_rx_ring_entries(cfg)
> ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring / 2))
> ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
>
> IOW flip between half or double and current.
>
>> +def main():
>> + with NetDrvEnv(__file__, nsim_test=False) as cfg:
>> + try:
>> + xsk_bkg = setup_xsk(cfg)
>> + except KsftSkipEx as e:
>> + print(f"WARN: xsk pool is not set up, err: {e}")
>> +
>> + ksft_run([check_xdp_bind, check_rx_resize],
>> + args=(cfg, ))
>> + ksft_exit()
>> +
>> +if __name__ == "__main__":
>> + main()
Powered by blists - more mailing lists