lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250515100354.3339920-1-edumazet@google.com>
Date: Thu, 15 May 2025 10:03:54 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, 
	Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com, 
	Eric Dumazet <edumazet@...gle.com>, Octavian Purdila <tavip@...gle.com>
Subject: [PATCH net-next] net: rfs: add sock_rps_delete_flow() helper

RFS can exhibit lower performance for workloads using short-lived
flows and a small set of 4-tuple.

This is often the case for load-testers, using a pair of hosts,
if the server has a single listener port.

Typical use case :

Server : tcp_crr -T128 -F1000 -6 -U -l30 -R 14250
Client : tcp_crr -T128 -F1000 -6 -U -l30 -c -H server | grep local_throughput

This is because RFS global hash table contains stale information,
when the same RSS key is recycled for another socket and another cpu.

Make sure to undo the changes and go back to initial state when
a flow is disconnected.

Performance of the above test is increased by 22 %,
going from 372604 transactions per second to 457773.

Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Reported-by: Octavian Purdila <tavip@...gle.com>
---
 include/net/rps.h          | 24 ++++++++++++++++++++++++
 net/ipv4/inet_hashtables.c |  6 ++++--
 net/ipv4/udp.c             |  2 ++
 net/sctp/socket.c          |  2 +-
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/include/net/rps.h b/include/net/rps.h
index 507f4aa5d39b296e65668969a13e5732738bf531..d8ab3a08bcc4882e2ad9c84c22ef26b254c14680 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -123,6 +123,30 @@ static inline void sock_rps_record_flow(const struct sock *sk)
 #endif
 }
 
+static inline void sock_rps_delete_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *table;
+	u32 hash, index;
+
+	if (!static_branch_unlikely(&rfs_needed))
+		return;
+
+	hash = READ_ONCE(sk->sk_rxhash);
+	if (!hash)
+		return;
+
+	rcu_read_lock();
+	table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+	if (table) {
+		index = hash & table->mask;
+		if (READ_ONCE(table->ents[index]) != RPS_NO_CPU)
+			WRITE_ONCE(table->ents[index], RPS_NO_CPU);
+	}
+	rcu_read_unlock();
+#endif
+}
+
 static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index da85cc30e3824ad2b0cd115854521657928eca07..77a0b52b2eabfc6b08c34acea9fda092b88a32b5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -23,11 +23,12 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/inet6_hashtables.h>
 #endif
-#include <net/secure_seq.h>
 #include <net/hotdata.h>
 #include <net/ip.h>
-#include <net/tcp.h>
+#include <net/rps.h>
+#include <net/secure_seq.h>
 #include <net/sock_reuseport.h>
+#include <net/tcp.h>
 
 u32 inet_ehashfn(const struct net *net, const __be32 laddr,
 		 const __u16 lport, const __be32 faddr,
@@ -790,6 +791,7 @@ void inet_unhash(struct sock *sk)
 	if (sk_unhashed(sk))
 		return;
 
+	sock_rps_delete_flow(sk);
 	if (sk->sk_state == TCP_LISTEN) {
 		struct inet_listen_hashbucket *ilb2;
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 358b49caa7b974ddef70da8482f8a35fdb003fa9..dde52b8050b8ca251ae13f20853c6c9512453dd0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -120,6 +120,7 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6_stubs.h>
 #endif
+#include <net/rps.h>
 
 struct udp_table udp_table __read_mostly;
 
@@ -2200,6 +2201,7 @@ void udp_lib_unhash(struct sock *sk)
 		struct udp_table *udptable = udp_get_table_prot(sk);
 		struct udp_hslot *hslot, *hslot2;
 
+		sock_rps_delete_flow(sk);
 		hslot  = udp_hashslot(udptable, sock_net(sk),
 				      udp_sk(sk)->udp_port_hash);
 		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 53725ee7ba06d780e220c3a184b4f611a7cb5e51..85a9dfeff4d6a5508ce77720b34180bc971ce396 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -8321,7 +8321,7 @@ static int sctp_hash(struct sock *sk)
 
 static void sctp_unhash(struct sock *sk)
 {
-	/* STUB */
+	sock_rps_delete_flow(sk);
 }
 
 /* Check if port is acceptable.  Possibly find first available port.
-- 
2.49.0.1101.gccaa498523-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ