[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201216012515.560026-3-weiwan@google.com>
Date: Tue, 15 Dec 2020 17:25:14 -0800
From: Wei Wang <weiwan@...gle.com>
To: Jakub Kicinski <kuba@...nel.org>,
David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Cc: Paolo Abeni <pabeni@...hat.com>,
Hannes Frederic Sowa <hannes@...essinduktion.org>,
Felix Fietkau <nbd@....name>,
Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next v5 2/3] net: implement threaded-able napi poll loop support
This patch allows running each napi poll loop inside its own
kernel thread.
The threaded mode could be enabled through napi_set_threaded()
api, and does not require a device up/down. The kthread gets
created on demand when napi_set_threaded() is called, and gets
shut down eventually in napi_disable().
Once that threaded mode is enabled and the kthread is
started, napi_schedule() will wake-up such thread instead
of scheduling the softirq.
The threaded poll loop behaves quite likely the net_rx_action,
but it does not have to manipulate local irqs and uses
an explicit scheduling point based on netdev_budget.
Co-developed-by: Paolo Abeni <pabeni@...hat.com>
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
Co-developed-by: Hannes Frederic Sowa <hannes@...essinduktion.org>
Signed-off-by: Hannes Frederic Sowa <hannes@...essinduktion.org>
Co-developed-by: Jakub Kicinski <kuba@...nel.org>
Signed-off-by: Jakub Kicinski <kuba@...nel.org>
Signed-off-by: Wei Wang <weiwan@...gle.com>
---
include/linux/netdevice.h | 12 ++--
net/core/dev.c | 121 ++++++++++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+), 7 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7bf167993c05..2cd1e3975103 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -347,6 +347,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
+ struct task_struct *thread;
};
enum {
@@ -358,6 +359,7 @@ enum {
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
};
enum {
@@ -369,6 +371,7 @@ enum {
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
@@ -511,13 +514,7 @@ void napi_disable(struct napi_struct *n);
* Resume NAPI from being scheduled on this context.
* Must be paired with napi_disable.
*/
-static inline void napi_enable(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
-}
+void napi_enable(struct napi_struct *n);
/**
* napi_synchronize - wait until NAPI is not running
@@ -2158,6 +2155,7 @@ struct net_device {
struct lock_class_key *qdisc_tx_busylock;
struct lock_class_key *qdisc_running_key;
bool proto_down;
+ bool threaded;
unsigned wol_enabled:1;
struct list_head net_notifier_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index adf74573f51c..47c33affaa80 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
@@ -1475,6 +1476,36 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
+static int napi_threaded_poll(void *data);
+
+static int napi_kthread_create(struct napi_struct *n)
+{
+ int err = 0;
+
+ /* Create and wake up the kthread once to put it in
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
+ * warning and work with loadavg.
+ */
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ n->dev->name, n->napi_id);
+ if (IS_ERR(n->thread)) {
+ err = PTR_ERR(n->thread);
+ pr_err("kthread_run failed with err %d\n", err);
+ n->thread = NULL;
+ }
+
+ return err;
+}
+
+static void napi_kthread_stop(struct napi_struct *n)
+{
+ if (!n->thread)
+ return;
+ kthread_stop(n->thread);
+ clear_bit(NAPI_STATE_THREADED, &n->state);
+ n->thread = NULL;
+}
+
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -4234,6 +4265,11 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
+ wake_up_process(napi->thread);
+ return;
+ }
+
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
@@ -6690,6 +6726,29 @@ static void init_gro_hash(struct napi_struct *napi)
napi->gro_bitmask = 0;
}
+static int napi_set_threaded(struct napi_struct *n, bool threaded)
+{
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ if (threaded == !!test_bit(NAPI_STATE_THREADED, &n->state))
+ return 0;
+ if (threaded) {
+ if (!n->thread) {
+ err = napi_kthread_create(n);
+ if (err)
+ goto out;
+ }
+ set_bit(NAPI_STATE_THREADED, &n->state);
+ } else {
+ clear_bit(NAPI_STATE_THREADED, &n->state);
+ }
+
+out:
+ return err;
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6731,12 +6790,29 @@ void napi_disable(struct napi_struct *n)
msleep(1);
hrtimer_cancel(&n->timer);
+ napi_kthread_stop(n);
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
}
EXPORT_SYMBOL(napi_disable);
+void napi_enable(struct napi_struct *n)
+{
+ bool locked = rtnl_is_locked();
+
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ smp_mb__before_atomic();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
+ if (!locked)
+ rtnl_lock();
+ WARN_ON(napi_set_threaded(n, n->dev->threaded));
+ if (!locked)
+ rtnl_unlock();
+}
+EXPORT_SYMBOL(napi_enable);
+
static void flush_gro_hash(struct napi_struct *napi)
{
int i;
@@ -6859,6 +6935,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
return work;
}
+static int napi_thread_wait(struct napi_struct *napi)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ WARN_ON(!list_empty(&napi->poll_list));
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+ return -1;
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+ void *have;
+
+ while (!napi_thread_wait(napi)) {
+ for (;;) {
+ bool repoll = false;
+
+ local_bh_disable();
+
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ __kfree_skb_flush();
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ cond_resched();
+ }
+ }
+ return 0;
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
--
2.29.2.684.gfbc64c5ab5-goog
Powered by blists - more mailing lists