lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <1630058273-2400-1-git-send-email-lirongqing@baidu.com>
Date:   Fri, 27 Aug 2021 17:57:53 +0800
From:   Li RongQing <lirongqing@...du.com>
To:     netdev@...r.kernel.org
Subject: [PATCH][RFC] net: optimise rps IPI sending

In virtualization setup, IPI sending will cause vmexit,
and is expensive so it should be avoid to send IPI one
by one in highest throughput

smp_call_function_many maybe call PV ipi to send IPI to
many cpus once

Signed-off-by: Li RongQing <lirongqing@...du.com>
---
 include/linux/netdevice.h  |  2 +-
 net/core/dev.c             | 32 +++++++++++++++++++++++++-------
 net/core/sysctl_net_core.c |  9 +++++++++
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bd8d5b8e2de3..ccf9e3e7c33d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4137,7 +4137,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
 
 extern int		netdev_budget;
 extern unsigned int	netdev_budget_usecs;
-
+extern unsigned int rps_pv_send_ipi __read_mostly;
 /* Called by rtnetlink.c:rtnl_unlock() */
 void netdev_run_todo(void);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 88650791c360..e839de51b555 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -203,6 +203,8 @@ static unsigned int napi_gen_id = NR_CPUS;
 static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
 
 static DECLARE_RWSEM(devnet_rename_sem);
+unsigned int rps_pv_send_ipi __read_mostly;
+static DEFINE_PER_CPU(cpumask_var_t, rps_ipi_mask);
 
 static inline void dev_base_seq_inc(struct net *net)
 {
@@ -4529,9 +4531,9 @@ EXPORT_SYMBOL(rps_may_expire_flow);
 #endif /* CONFIG_RFS_ACCEL */
 
 /* Called from hardirq (IPI) context */
-static void rps_trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data __maybe_unused)
 {
-	struct softnet_data *sd = data;
+	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
 
 	____napi_schedule(sd, &sd->backlog);
 	sd->received_rps++;
@@ -6364,12 +6366,26 @@ EXPORT_SYMBOL(__skb_gro_checksum_complete);
 static void net_rps_send_ipi(struct softnet_data *remsd)
 {
 #ifdef CONFIG_RPS
-	while (remsd) {
-		struct softnet_data *next = remsd->rps_ipi_next;
+	if (!rps_pv_send_ipi) {
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				smp_call_function_single_async(remsd->cpu, &remsd->csd);
+			remsd = next;
+		}
+	} else {
+		struct cpumask *tmpmask = this_cpu_cpumask_var_ptr(rps_ipi_mask);
+
+		cpumask_clear(tmpmask);
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
 
-		if (cpu_online(remsd->cpu))
-			smp_call_function_single_async(remsd->cpu, &remsd->csd);
-		remsd = next;
+			if (cpu_online(remsd->cpu))
+				cpumask_set_cpu(remsd->cpu, tmpmask);
+			remsd = next;
+		}
+		smp_call_function_many(tmpmask, rps_trigger_softirq, NULL, false);
 	}
 #endif
 }
@@ -11627,6 +11643,8 @@ static int __init net_dev_init(void)
 #ifdef CONFIG_RPS
 		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
 		sd->cpu = i;
+		zalloc_cpumask_var_node(&per_cpu(rps_ipi_mask, i),
+			GFP_KERNEL, cpu_to_node(i));
 #endif
 
 		init_gro_hash(&sd->backlog);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..dc807841d7c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -377,6 +377,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_do_rss_key,
 	},
+	{
+		.procname	= "rps_pv_send_ipi",
+		.data		= &rps_pv_send_ipi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 #ifdef CONFIG_BPF_JIT
 	{
 		.procname	= "bpf_jit_enable",
-- 
2.33.0.69.gc420321.dirty

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ