[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250623183006.7c1c0cfc@kernel.org>
Date: Mon, 23 Jun 2025 18:30:06 -0700
From: Jakub Kicinski <kuba@...nel.org>
To: Breno Leitao <leitao@...ian.org>
Cc: Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Paolo Abeni
<pabeni@...hat.com>, Shuah Khan <shuah@...nel.org>,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
linux-kselftest@...r.kernel.org, Willem de Bruijn
<willemdebruijn.kernel@...il.com>, gustavold@...il.com
Subject: Re: [PATCH net-next] selftests: net: add netpoll basic
functionality test
On Fri, 20 Jun 2025 02:38:31 -0700 Breno Leitao wrote:
> Add a basic selftest for the netpoll polling mechanism, specifically
> targeting the netpoll poll() side.
>
> The test creates a scenario where network transmission is running at
> maximum speed, and netpoll needs to poll the NIC. This is achieved by:
>
> 1. Configuring a single RX/TX queue to create contention
> 2. Generating background traffic to saturate the interface
> 3. Sending netconsole messages to trigger netpoll polling
> 4. Using dynamic netconsole targets via configfs
> 5. Delete and create new netconsole targets after 5 iterations
>
> The test validates a critical netpoll code path by monitoring traffic
> flow and ensuring netpoll_poll_dev() is called when the normal TX path
> is blocked. Perf probing confirms this test successfully triggers
> netpoll_poll_dev() in typical test runs.
>
> This addresses a gap in netpoll test coverage for a path that is
> tricky for the network stack.
> diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
> new file mode 100755
> index 0000000000000..2a81926169262
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
> @@ -0,0 +1,231 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0
> +
> +# This test aims to evaluate the netpoll polling mechanism (as in
> +# netpoll_poll_dev()). It presents a complex scenario where the network
> +# attempts to send a packet but fails, prompting it to poll the NIC from within
> +# the netpoll TX side.
> +#
> +# This has been a crucial path in netpoll that was previously untested. Jakub
> +# suggested using a single RX/TX queue, pushing traffic to the NIC, and then
> +# sending netpoll messages (via netconsole) to trigger the poll. `perf` probing
> +# of netpoll_poll_dev() showed that this test indeed triggers
> +# netpoll_poll_dev() once or twice in 10 iterations.
Could you turn this into a docstring?
tools/testing/selftests/drivers/net/netpoll_basic.py:1:0: C0114: Missing module docstring (missing-module-docstring)
Some more pylint warnings:
+tools/testing/selftests/drivers/net/netpoll_basic.py:200:0: C0301: Line too long (111/100) (line-too-long)
+tools/testing/selftests/drivers/net/netpoll_basic.py:1:0: C0114: Missing module docstring (missing-module-docstring)
+tools/testing/selftests/drivers/net/netpoll_basic.py:61:8: W0707: Consider explicitly re-raising using 'raise KsftSkipEx(f'Failed to configure RX/TX queues: {e}. Ethtool not available?') from e' (raise-missing-from)
+tools/testing/selftests/drivers/net/netpoll_basic.py:77:12: W0707: Consider explicitly re-raising using 'raise KsftFailEx(f'Failed to create netconsole target directory: {e}') from e' (raise-missing-from)
+tools/testing/selftests/drivers/net/netpoll_basic.py:106:8: W0707: Consider explicitly re-raising using 'raise KsftFailEx(f'Failed to configure netconsole target: {e}') from e' (raise-missing-from)
+tools/testing/selftests/drivers/net/netpoll_basic.py:135:8: W0707: Consider explicitly re-raising using 'raise KsftFailEx(f'Failed to delete netconsole target: {e}') from e' (raise-missing-from)
+tools/testing/selftests/drivers/net/netpoll_basic.py:208:11: W0718: Catching too general exception Exception (broad-exception-caught)
> +# Author: Breno Leitao <leitao@...ian.org>
> +
> +import errno
> +import os
> +import random
> +import string
> +import time
> +
> +from lib.py import (
> + ethtool,
> + GenerateTraffic,
> + ksft_exit,
> + ksft_pr,
> + ksft_run,
> + KsftFailEx,
> + KsftSkipEx,
> + NetdevFamily,
> + NetDrvEpEnv,
> +)
> +
> +NETCONSOLE_CONFIGFS_PATH = "/sys/kernel/config/netconsole"
> +REMOTE_PORT = 6666
> +LOCAL_PORT = 1514
> +# Number of netcons messages to send. I usually see netpoll_poll_dev()
> +# being called at least once in 10 iterations. Having 20 to have some buffers
> +ITERATIONS = 20
> +DEBUG = False
> +
> +
> +def generate_random_netcons_name() -> str:
> + """Generate a random target name starting with 'netcons'"""
> + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
> + return f"netcons_{random_suffix}"
> +
> +
> +def get_stats(cfg: NetDrvEpEnv, netdevnl: NetdevFamily) -> dict[str, int]:
> + """Get the statistics for the interface"""
> + return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
> +
> +
> +def set_single_rx_tx_queue(interface_name: str) -> None:
> + """Set the number of RX and TX queues to 1 using ethtool"""
> + try:
> + # This don't need to be reverted, since interfaces will be deleted after test
> + ethtool(f"-G {interface_name} rx 1 tx 1")
Would be nice to be able to run this test on real HW too.
Can you add appropriate defer() calls to undo the configuration changes?
> + except Exception as e:
> + raise KsftSkipEx(
> + f"Failed to configure RX/TX queues: {e}. Ethtool not available?"
> + )
> +
> +
> +def create_netconsole_target(
> + config_data: dict[str, str],
> + target_name: str,
> +) -> None:
> + """Create a netconsole dynamic target against the interfaces"""
> + ksft_pr(f"Using netconsole name: {target_name}")
> + try:
> + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
> + ksft_pr(f"Created target directory: {NETCONSOLE_CONFIGFS_PATH}/{target_name}")
> + except OSError as e:
> + if e.errno != errno.EEXIST:
> + raise KsftFailEx(f"Failed to create netconsole target directory: {e}")
> +
> + try:
> + for key, value in config_data.items():
> + if DEBUG:
> + ksft_pr(f"Setting {key} to {value}")
> + with open(
> + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}",
Could be personal preference but I think that using temp variable to
store the argument looks better than breaking out the function call
over 5 lines..
FWIW we don't care about Python black upstream.
> + "w",
> + encoding="utf-8",
> + ) as f:
> + # Always convert to string to write to file
> + f.write(str(value))
> + f.close()
> +
> + if DEBUG:
> + # Read all configuration values for debugging
> + for debug_key in config_data.keys():
> + with open(
> + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
> + "r",
> + encoding="utf-8",
> + ) as f:
> + content = f.read()
> + ksft_pr(
> + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key} {content}"
> + )
> +
> + except Exception as e:
> + raise KsftFailEx(f"Failed to configure netconsole target: {e}")
> +
> +
> +def set_netconsole(cfg: NetDrvEpEnv, interface_name: str, target_name: str) -> None:
> + """Configure netconsole on the interface with the given target name"""
> + config_data = {
> + "extended": "1",
> + "dev_name": interface_name,
> + "local_port": LOCAL_PORT,
> + "remote_port": REMOTE_PORT,
> + "local_ip": cfg.addr_v["4"] if cfg.addr_ipver == "4" else cfg.addr_v["6"],
> + "remote_ip": (
> + cfg.remote_addr_v["4"] if cfg.addr_ipver == "4" else cfg.remote_addr_v["6"]
> + ),
> + "remote_mac": "00:00:00:00:00:00", # Not important for this test
> + "enabled": "1",
> + }
> +
> + create_netconsole_target(config_data, target_name)
> + ksft_pr(f"Created netconsole target: {target_name} on interface {interface_name}")
> +
> +
> +def delete_netconsole_target(name: str) -> None:
> + """Delete a netconsole dynamic target"""
> + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
> + try:
> + if os.path.exists(target_path):
> + os.rmdir(target_path)
> + except OSError as e:
> + raise KsftFailEx(f"Failed to delete netconsole target: {e}")
> +
> +
> +def check_traffic_flowing(cfg: NetDrvEpEnv, netdevnl: NetdevFamily) -> int:
> + """Check if traffic is flowing on the interface"""
> + stat1 = get_stats(cfg, netdevnl)
> + time.sleep(1)
> + stat2 = get_stats(cfg, netdevnl)
> + pkts_per_sec = stat2["rx-packets"] - stat1["rx-packets"]
> + # Just make sure this will not fail even in slow/debug kernels
> + if pkts_per_sec < 10:
> + raise KsftFailEx(f"Traffic seems low: {pkts_per_sec}")
> + if DEBUG:
> + ksft_pr(f"Traffic per second {pkts_per_sec}")
> +
> + return pkts_per_sec
> +
> +
> +def do_netpoll_flush(
> + cfg: NetDrvEpEnv, netdevnl: NetdevFamily, ifname: str, target_name: str
> +) -> None:
> + """Print messages to the console, trying to trigger a netpoll poll"""
> +
> + set_netconsole(cfg, ifname, target_name)
> + for i in range(int(ITERATIONS)):
> + msg = f"netcons test #{i}."
> +
> + if DEBUG:
> + pkts_per_s = check_traffic_flowing(cfg, netdevnl)
> + msg += f" ({pkts_per_s} packets/s)"
> +
> + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
> + kmsg.write(msg)
> +
> + if not i % 5:
> + # Every 5 iterations, toggle netconsole
> + delete_netconsole_target(target_name)
> + set_netconsole(cfg, ifname, target_name)
> +
> +
> +def test_netpoll(cfg: NetDrvEpEnv, netdevnl: NetdevFamily) -> None:
> + """
> + Test netpoll by sending traffic to the interface and then sending
> + netconsole messages to trigger a poll
> + """
> +
> + target_name = generate_random_netcons_name()
> + ifname = cfg.dev["ifname"]
> + traffic = None
> +
> + try:
> + set_single_rx_tx_queue(ifname)
> + traffic = GenerateTraffic(cfg)
> + check_traffic_flowing(cfg, netdevnl)
Any reason to perform this check? GenerateTraffic() already waits for
traffic to ramp up. Do we need to adjust the logic there, or make some
methods public?
> + do_netpoll_flush(cfg, netdevnl, ifname, target_name)
> + finally:
> + if traffic:
> + traffic.stop()
> + delete_netconsole_target(target_name)
> +
> +
> +def check_dependencies() -> None:
> + """Check if the dependencies are met"""
> + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
> + raise KsftSkipEx(
> + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set."
> + )
> +
> +
> +def load_netconsole_module() -> None:
> + """Try to load the netconsole module"""
> + try:
> + os.system("modprobe netconsole")
> + except Exception:
> + # It is fine if we fail to load the module, it will fail later
> + # at check_dependencies()
> + pass
> +
> +
> +def main() -> None:
> + """Main function to run the test"""
> + load_netconsole_module()
> + check_dependencies()
> + netdevnl = NetdevFamily()
> + with NetDrvEpEnv(__file__, nsim_test=True) as cfg:
> + ksft_run(
> + [test_netpoll],
> + args=(
> + cfg,
> + netdevnl,
> + ),
> + )
> + ksft_exit()
> +
> +
> +if __name__ == "__main__":
> + main()
>
> ---
> base-commit: 4f4040ea5d3e4bebebbef9379f88085c8b99221c
> change-id: 20250612-netpoll_test-a1324d2057c8
>
> Best regards,
> --
> Breno Leitao <leitao@...ian.org>
>
Powered by blists - more mailing lists