[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z7UBJ_CIrvsSdmnt@LQ3V64L9R2>
Date: Tue, 18 Feb 2025 16:52:39 -0500
From: Joe Damato <jdamato@...tly.com>
To: Jakub Kicinski <kuba@...nel.org>
Cc: davem@...emloft.net, netdev@...r.kernel.org, edumazet@...gle.com,
pabeni@...hat.com, andrew+netdev@...n.ch, horms@...nel.org,
shuah@...nel.org, hawk@...nel.org, petrm@...dia.com,
willemdebruijn.kernel@...il.com
Subject: Re: [PATCH net-next 2/4] selftests: drv-net: add a way to wait for a
local process
On Tue, Feb 18, 2025 at 11:50:46AM -0800, Jakub Kicinski wrote:
> We use wait_port_listen() extensively to wait for a process
> we spawned to be ready. Not all processes will open listening
> sockets. Add a method of explicitly waiting for a child to
> be ready. Pass a FD to the spawned process and wait for it
> to write a message to us. FD number is passed via KSFT_READY_FD
> env variable.
>
> Make use of this method in the queues test to make it less flaky.
>
> Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> ---
> .../selftests/drivers/net/xdp_helper.c | 22 ++++++-
> tools/testing/selftests/drivers/net/queues.py | 46 ++++++---------
> tools/testing/selftests/net/lib/py/utils.py | 58 +++++++++++++++++--
> 3 files changed, 93 insertions(+), 33 deletions(-)
>
> diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c
> index cf06a88b830b..8f77da4f798f 100644
> --- a/tools/testing/selftests/drivers/net/xdp_helper.c
> +++ b/tools/testing/selftests/drivers/net/xdp_helper.c
> @@ -14,6 +14,25 @@
> #define UMEM_SZ (1U << 16)
> #define NUM_DESC (UMEM_SZ / 2048)
>
> +/* Move this to a common header when reused! */
> +static void ksft_ready(void)
> +{
> + const char msg[7] = "ready\n";
> + char *env_str;
> + int fd;
> +
> + env_str = getenv("KSFT_READY_FD");
> + if (!env_str)
> + return;
> +
> + fd = atoi(env_str);
> + if (!fd)
> + return;
> +
> + write(fd, msg, sizeof(msg));
> + close(fd);
> +}
> +
> /* this is a simple helper program that creates an XDP socket and does the
> * minimum necessary to get bind() to succeed.
> *
> @@ -85,8 +104,7 @@ int main(int argc, char **argv)
> return 1;
> }
>
> - /* give the parent program some data when the socket is ready*/
> - fprintf(stdout, "%d\n", sock_fd);
> + ksft_ready();
>
> /* parent program will write a byte to stdin when its ready for this
> * helper to exit
> diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
> index b6896a57a5fd..91e344d108ee 100755
> --- a/tools/testing/selftests/drivers/net/queues.py
> +++ b/tools/testing/selftests/drivers/net/queues.py
> @@ -5,13 +5,12 @@ from lib.py import ksft_disruptive, ksft_exit, ksft_run
> from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx
> from lib.py import EthtoolFamily, NetdevFamily, NlError
> from lib.py import NetDrvEnv
> -from lib.py import cmd, defer, ip
> +from lib.py import bkg, cmd, defer, ip
> import errno
> import glob
> import os
> import socket
> import struct
> -import subprocess
>
> def sys_get_queues(ifname, qtype='rx') -> int:
> folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
> @@ -25,37 +24,30 @@ import subprocess
> return None
>
> def check_xdp(cfg, nl, xdp_queue_id=0) -> None:
> - xdp = subprocess.Popen([cfg.rpath("xdp_helper"), f"{cfg.ifindex}", f"{xdp_queue_id}"],
> - stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1,
> - text=True)
> - defer(xdp.kill)
> + with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}',
> + wait_init=3):
>
> - stdout, stderr = xdp.communicate(timeout=10)
> - rx = tx = False
> + rx = tx = False
>
> - if xdp.returncode == 255:
> - raise KsftSkipEx('AF_XDP unsupported')
> - elif xdp.returncode > 0:
Removing this check causes a stack trace on my XDP-disabled kernel,
whereas with the existing code it caused a skip.
Maybe that's OK, though?
The issue is that xdp_helper.c fails and exits with return -1 before
the call to ksft_ready() which results in the following:
# Exception| Traceback (most recent call last):
# Exception| File "/home/jdamato/code/net-next/tools/testing/selftests/net/lib/py/ksft.py", line 223, in ksft_run
# Exception| case(*args)
# Exception| File "/home/jdamato/code/net-next/./tools/testing/selftests/drivers/net/queues.py", line 27, in check_xsk
# Exception| with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}',
# Exception| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Exception| File "/home/jdamato/code/net-next/tools/testing/selftests/net/lib/py/utils.py", line 108, in __init__
# Exception| super().__init__(comm, background=True,
# Exception| File "/home/jdamato/code/net-next/tools/testing/selftests/net/lib/py/utils.py", line 63, in __init__
# Exception| raise Exception("Did not receive ready message")
# Exception| Exception: Did not receive ready message
not ok 4 queues.check_xsk
# Totals: pass:3 fail:1 xfail:0 xpass:0 skip:0 error:0
I had originally modified the test so that if XDP is disabled in the
kernel it would skip, but I think you mentioned in a previous thread
that this was a "non-goal", IIRC ?
No strong opinion on my side as to what the behavior should be when
XDP is disabled, but wanted to mention this so that the behavior
change was known.
Separately: I retested this on a machine with XDP enabled, both with
and without NETIF set and the test seems to hang because the helper
is blocked on:
read(STDIN_FILENO, &byte, 1);
according to strace:
strace: Process 14198 attached
21:50:02 read(0,
So, I think this patch needs to be tweaked to write a byte to the
helper so it exits (I assume before the defer was killing it?) or
the helper needs to be modified in way?
Powered by blists - more mailing lists