[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210223234130.437831-1-weiwan@google.com>
Date: Tue, 23 Feb 2021 15:41:30 -0800
From: Wei Wang <weiwan@...gle.com>
To: "David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org
Cc: Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>,
Hannes Frederic Sowa <hannes@...essinduktion.org>,
Alexander Duyck <alexanderduyck@...com>,
Martin Zaharinov <micron10@...il.com>
Subject: [PATCH net] net: fix race between napi kthread mode and busy poll
Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
determine if the kthread owns this napi and could call napi->poll() on
it. However, if socket busy poll is enabled, it is possible that the
busy poll thread grabs this SCHED bit (after the previous napi->poll()
invokes napi_complete_done() and clears SCHED bit) and tries to poll
on the same napi.
This patch tries to fix this race by adding a new bit
NAPI_STATE_SCHED_BUSY_POLL in napi->state. This bit gets set in
napi_busy_loop() togther with NAPI_STATE_SCHED, and gets cleared in
napi_complete_done() together with NAPI_STATE_SCHED. This helps
distinguish the ownership of the napi between kthread and the busy poll
thread, and prevents the kthread from polling on the napi when this napi
is still owned by the busy poll thread.
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
Reported-by: Martin Zaharinov <micron10@...il.com>
Suggested-by: Alexander Duyck <alexanderduyck@...com>
Reviewed-by: Alexander Duyck <alexanderduyck@...com>
Reviewed-by: Eric Dumazet <edumazet@...gle.come>
Signed-off-by: Wei Wang <weiwan@...gle.com>
---
include/linux/netdevice.h | 4 +++-
net/core/dev.c | 10 ++++++++--
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddf4cfc12615..9ed0f89ccdd5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -357,9 +357,10 @@ enum {
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
- NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
+ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() grabs SHED bit and could busy poll */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
+ NAPI_STATE_SCHED_BUSY_POLL, /* Napi is currently scheduled in busy poll mode */
};
enum {
@@ -372,6 +373,7 @@ enum {
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
+ NAPIF_STATE_SCHED_BUSY_POLL = BIT(NAPI_STATE_SCHED_BUSY_POLL),
};
enum gro_result {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c5967e80132..ec1a30d95d8b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6486,6 +6486,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+ NAPIF_STATE_SCHED_BUSY_POLL |
NAPIF_STATE_PREFER_BUSY_POLL);
/* If STATE_MISSED was set, leave STATE_SCHED set,
@@ -6525,6 +6526,7 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
{
+ clear_bit(NAPI_STATE_SCHED_BUSY_POLL, &napi->state);
if (!skip_schedule) {
gro_normal_list(napi);
__napi_schedule(napi);
@@ -6624,7 +6626,8 @@ void napi_busy_loop(unsigned int napi_id,
}
if (cmpxchg(&napi->state, val,
val | NAPIF_STATE_IN_BUSY_POLL |
- NAPIF_STATE_SCHED) != val) {
+ NAPIF_STATE_SCHED |
+ NAPIF_STATE_SCHED_BUSY_POLL) != val) {
if (prefer_busy_poll)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
@@ -6971,7 +6974,10 @@ static int napi_thread_wait(struct napi_struct *napi)
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ unsigned long val = READ_ONCE(napi->state);
+
+ if (val & NAPIF_STATE_SCHED &&
+ !(val & NAPIF_STATE_SCHED_BUSY_POLL)) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
return 0;
--
2.30.0.617.g56c4b15f3c-goog
Powered by blists - more mailing lists