lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250825195947.4073595-4-edumazet@google.com>
Date: Mon, 25 Aug 2025 19:59:47 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, 
	Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com, 
	Willem de Bruijn <willemb@...gle.com>, Kuniyuki Iwashima <kuniyu@...gle.com>, 
	Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next 3/3] net: add new sk->sk_drops1 field

sk->sk_drops can be heavily contended when
changed from many cpus.

Instead using too expensive per-cpu data structure,
add a second sk->sk_drops1 field and change
sk_drops_inc() to be NUMA aware.

This patch adds 64 bytes per socket.

For hosts having more than two memory nodes, sk_drops_inc()
might not be optimal and can be refined later.

Tested with the following stress test, sending about 11 Mpps
to a dual socket AMD EPYC 7B13 64-Core.

super_netperf 20 -t UDP_STREAM -H DUT -l10 -- -n -P,1000 -m 120
Note: due to socket lookup, only one UDP socket will receive
packets on DUT.

Then measure receiver (DUT) behavior. We can see
consumer and BH handlers can process more packets per second.

Before:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 855592             0.0
Udp6InErrors                    5621467            0.0
Udp6RcvbufErrors                5621467            0.0

After:
nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 914537             0.0
Udp6InErrors                    6888487            0.0
Udp6RcvbufErrors                6888487            0.0

Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
 include/net/sock.h                            | 20 +++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_netlink.c    |  3 ++-
 .../selftests/bpf/progs/bpf_iter_udp4.c       |  2 +-
 .../selftests/bpf/progs/bpf_iter_udp6.c       |  2 +-
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index f40e3c4883be32c8282694ab215bcf79eb87cbd7..318169eb1a3d40eefac50147012551614abc6f7a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -282,6 +282,7 @@ struct sk_filter;
   *	@sk_err_soft: errors that don't cause failure but are the cause of a
   *		      persistent failure not just 'timed out'
   *	@sk_drops: raw/udp drops counter
+  *	@sk_drops1: second drops counter
   *	@sk_ack_backlog: current listen backlog
   *	@sk_max_ack_backlog: listen backlog set in listen()
   *	@sk_uid: user id of owner
@@ -571,6 +572,11 @@ struct sock {
 	atomic_t		sk_drops ____cacheline_aligned_in_smp;
 	struct rcu_head		sk_rcu;
 	netns_tracker		ns_tracker;
+#if defined(CONFIG_NUMA)
+	atomic_t		sk_drops1 ____cacheline_aligned_in_smp;
+#else
+	atomic_t		sk_drops1;
+#endif
 };
 
 struct sock_bh_locked {
@@ -2684,17 +2690,31 @@ struct sock_skb_cb {
 
 static inline void sk_drops_inc(struct sock *sk)
 {
+#if defined(CONFIG_NUMA)
+	int n = numa_node_id() % 2;
+
+	if (n)
+		atomic_inc(&sk->sk_drops1);
+	else
+		atomic_inc(&sk->sk_drops);
+#else
 	atomic_inc(&sk->sk_drops);
+#endif
 }
 
 static inline int sk_drops_read(const struct sock *sk)
 {
+#if defined(CONFIG_NUMA)
+	return atomic_read(&sk->sk_drops) + atomic_read(&sk->sk_drops1);
+#else
 	return atomic_read(&sk->sk_drops);
+#endif
 }
 
 static inline void sk_drops_reset(struct sock *sk)
 {
 	atomic_set(&sk->sk_drops, 0);
+	atomic_set(&sk->sk_drops1, 0);
 }
 
 static inline void
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 00b2ceae81fb0914f2de3634eb342004e8bc3c5b..31ad9fcc6022d5d31b9c6a35daacaad7c887a51f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -57,7 +57,8 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
 		inode = SOCK_INODE(sk);
 		bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
 	}
-	BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+	BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n",
+		       s->sk_drops.counter + s->sk_drops1.counter, ino);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index ffbd4b116d17ffbb9f14440c788e50490fb0f4e0..192ab5693a7131c1ec5879e539651c21f6f3c9ae 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -64,7 +64,7 @@ int dump_udp4(struct bpf_iter__udp *ctx)
 		       0, 0L, 0, ctx->uid, 0,
 		       sock_i_ino(&inet->sk),
 		       inet->sk.sk_refcnt.refs.counter, udp_sk,
-		       inet->sk.sk_drops.counter);
+		       inet->sk.sk_drops.counter + inet->sk.sk_drops1.counter);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 47ff7754f4fda4c9db92fbf1dc2e6a68f044174e..5170bdf458fa1b9a4eea9240fbaa5934182a7776 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx)
 		       0, 0L, 0, ctx->uid, 0,
 		       sock_i_ino(&inet->sk),
 		       inet->sk.sk_refcnt.refs.counter, udp_sk,
-		       inet->sk.sk_drops.counter);
+		       inet->sk.sk_drops.counter + inet->sk.sk_drops1.counter);
 
 	return 0;
 }
-- 
2.51.0.261.g7ce5a0a67e-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ