[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250519224325.3117279-1-skhawaja@google.com>
Date: Mon, 19 May 2025 22:43:25 +0000
From: Samiullah Khawaja <skhawaja@...gle.com>
To: Jakub Kicinski <kuba@...nel.org>, "David S . Miller " <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Paolo Abeni <pabeni@...hat.com>, almasrymina@...gle.com,
willemb@...gle.com, jdamato@...tly.com, mkarsten@...terloo.ca,
weiwan@...gle.com
Cc: netdev@...r.kernel.org, skhawaja@...gle.com
Subject: [PATCH net-next v2] net: stop napi kthreads when THREADED napi is disabled
Once the THREADED napi is disabled, the napi kthread should also be
stopped. Keeping the kthread intact after disabling THREADED napi makes
the PID of this kthread show up in the output of netlink 'napi-get' and
ps -ef output.
The is discussed in the patch below:
https://lore.kernel.org/all/20250502191548.559cc416@kernel.org
The napi THREADED state should be unset before stopping the thread. This
makes sure that this napi will not be scheduled again for threaded
polling (SCHED_THREADED). In the napi_thread_wait function we need to
make sure that the SCHED_THREADED is not set before stopping. Once the
SCHED_THREADED is unset it means that the napi kthread doesn't own this
napi and it is safe to stop it.
____napi_schedule for threaded napi is also modified to make sure that
the STATE_THREADED is not unset while we are setting SCHED_THREADED and
waking up the napi kthread. If STATE_THREADED is unset while the
threaded napi is being scheduled, the schedule code will recheck the
STATE_THREADED to prevent wakeup of a stopped thread. Use try_cmpxchg to
make sure the value of napi->state is not changed.
Add a new test in nl_netdev to verify this behaviour.
Tested:
./tools/testing/selftests/net/nl_netdev.py
TAP version 13
1..6
ok 1 nl_netdev.empty_check
ok 2 nl_netdev.lo_check
ok 3 nl_netdev.page_pool_check
ok 4 nl_netdev.napi_list_check
ok 5 nl_netdev.dev_set_threaded
ok 6 nl_netdev.nsim_rxq_reset_down
# Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Samiullah Khawaja <skhawaja@...gle.com>
---
v2:
- Handle the race where the STATE_THREADED is disabled and kthread is
stopped while the ____napi_schedule code has already checked the
STATE_THREADED and tries to wakeup napi kthread that is stopped.
net/core/dev.c | 77 +++++++++++++++++++-----
tools/testing/selftests/net/nl_netdev.py | 38 +++++++++++-
2 files changed, 98 insertions(+), 17 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index d0563ddff6ca..52d173f5206c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4754,15 +4754,18 @@ int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
-/* Called with irq disabled */
-static inline void ____napi_schedule(struct softnet_data *sd,
- struct napi_struct *napi)
+static inline bool ____try_napi_schedule_threaded(struct softnet_data *sd,
+ struct napi_struct *napi)
{
struct task_struct *thread;
+ unsigned long new, val;
- lockdep_assert_irqs_disabled();
+ do {
+ val = READ_ONCE(napi->state);
+
+ if (!(val & NAPIF_STATE_THREADED))
+ return false;
- if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
* napi_enable()/dev_set_threaded().
* Use READ_ONCE() to guarantee a complete
@@ -4770,17 +4773,30 @@ static inline void ____napi_schedule(struct softnet_data *sd,
* wake_up_process() when it's not NULL.
*/
thread = READ_ONCE(napi->thread);
- if (thread) {
- if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi))
- goto use_local_napi;
+ if (!thread)
+ return false;
- set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
- wake_up_process(thread);
- return;
- }
- }
+ if (use_backlog_threads() &&
+ thread == raw_cpu_read(backlog_napi))
+ return false;
+
+ new = val | NAPIF_STATE_SCHED_THREADED;
+ } while (!try_cmpxchg(&napi->state, &val, new));
+
+ wake_up_process(thread);
+ return true;
+}
+
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+ struct napi_struct *napi)
+{
+ lockdep_assert_irqs_disabled();
+
+ /* try to schedule threaded napi if enabled */
+ if (____try_napi_schedule_threaded(sd, napi))
+ return;
-use_local_napi:
list_add_tail(&napi->poll_list, &sd->poll_list);
WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
@@ -6888,6 +6904,18 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
+static void dev_stop_napi_threads(struct net_device *dev)
+{
+ struct napi_struct *napi;
+
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (napi->thread) {
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+ }
+ }
+}
+
int dev_set_threaded(struct net_device *dev, bool threaded)
{
struct napi_struct *napi;
@@ -6926,6 +6954,15 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
list_for_each_entry(napi, &dev->napi_list, dev_list)
assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ /* Calling kthread_stop on napi threads should be safe now as the
+ * threaded state is disabled.
+ */
+ if (!threaded) {
+ /* Make sure the state is set before stopping threads.*/
+ smp_mb__before_atomic();
+ dev_stop_napi_threads(dev);
+ }
+
return err;
}
EXPORT_SYMBOL(dev_set_threaded);
@@ -7451,7 +7488,8 @@ static int napi_thread_wait(struct napi_struct *napi)
{
set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop()) {
+ /* Wait until we are scheduled or asked to stop. */
+ while (true) {
/* Testing SCHED_THREADED bit here to make sure the current
* kthread owns this napi and could poll on this napi.
* Testing SCHED bit is not enough because SCHED bit might be
@@ -7463,6 +7501,15 @@ static int napi_thread_wait(struct napi_struct *napi)
return 0;
}
+ /* Since the SCHED_THREADED is not set so this napi kthread does
+ * not own this napi and it is safe to stop here. Checking the
+ * SCHED_THREADED before stopping here makes sure that this napi
+ * was not scheduled again while napi threaded was being
+ * disabled.
+ */
+ if (kthread_should_stop())
+ break;
+
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index beaee5e4e2aa..c9109627a741 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -34,6 +35,39 @@ def napi_list_check(nf) -> None:
ksft_eq(len(napis), 100,
comment=f"queue count after reset queue {q} mode {i}")
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1.get('pid'), None)
def nsim_rxq_reset_down(nf) -> None:
"""
@@ -122,7 +156,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
- nsim_rxq_reset_down],
+ dev_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
base-commit: b65999e7238e6f2a48dc77c8c2109c48318ff41b
--
2.49.0.1101.gccaa498523-goog
Powered by blists - more mailing lists