lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250826125031.1578842-4-edumazet@google.com>
Date: Tue, 26 Aug 2025 12:50:29 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, 
	Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com, 
	Willem de Bruijn <willemb@...gle.com>, Kuniyuki Iwashima <kuniyu@...gle.com>, 
	Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH v2 net-next 3/5] net: add sk->sk_drop_counters

Some sockets suffer from heavy false sharing on sk->sk_drops,
and fields in the same cache line.

Add sk->sk_drop_counters to:

- move the drop counter(s) to dedicated cache lines.
- Add basic NUMA awareness to these drop counter(s).

Following patches will use this infrastructure for UDP and RAW sockets.

sk_clone_lock() is not yet ready, it would need to properly
set newsk->sk_drop_counters if we plan to use this for TCP sockets.

v2: used Paolo suggestion from https://lore.kernel.org/netdev/8f09830a-d83d-43c9-b36b-88ba0a23e9b2@redhat.com/

Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
 include/net/sock.h | 32 +++++++++++++++++++++++++++++++-
 net/core/sock.c    |  2 ++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9edb42ff06224cb8a1dd4f84af25bc22d1803ca9..73cd3316e288bde912dd96637e52d226575c2ffd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -102,6 +102,11 @@ struct net;
 typedef __u32 __bitwise __portpair;
 typedef __u64 __bitwise __addrpair;
 
+struct socket_drop_counters {
+	atomic_t	drops0 ____cacheline_aligned_in_smp;
+	atomic_t	drops1 ____cacheline_aligned_in_smp;
+};
+
 /**
  *	struct sock_common - minimal network layer representation of sockets
  *	@skc_daddr: Foreign IPv4 addr
@@ -282,6 +287,7 @@ struct sk_filter;
   *	@sk_err_soft: errors that don't cause failure but are the cause of a
   *		      persistent failure not just 'timed out'
   *	@sk_drops: raw/udp drops counter
+  *	@sk_drop_counters: optional pointer to socket_drop_counters
   *	@sk_ack_backlog: current listen backlog
   *	@sk_max_ack_backlog: listen backlog set in listen()
   *	@sk_uid: user id of owner
@@ -449,6 +455,7 @@ struct sock {
 #ifdef CONFIG_XFRM
 	struct xfrm_policy __rcu *sk_policy[2];
 #endif
+	struct socket_drop_counters *sk_drop_counters;
 	__cacheline_group_end(sock_read_rxtx);
 
 	__cacheline_group_begin(sock_write_rxtx);
@@ -2684,7 +2691,18 @@ struct sock_skb_cb {
 
 static inline void sk_drops_add(struct sock *sk, int segs)
 {
-	atomic_add(segs, &sk->sk_drops);
+	struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+	if (sdc) {
+		int n = numa_node_id() % 2;
+
+		if (n)
+			atomic_add(segs, &sdc->drops1);
+		else
+			atomic_add(segs, &sdc->drops0);
+	} else {
+		atomic_add(segs, &sk->sk_drops);
+	}
 }
 
 static inline void sk_drops_inc(struct sock *sk)
@@ -2694,11 +2712,23 @@ static inline void sk_drops_inc(struct sock *sk)
 
 static inline int sk_drops_read(const struct sock *sk)
 {
+	const struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+	if (sdc) {
+		DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
+		return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
+	}
 	return atomic_read(&sk->sk_drops);
 }
 
 static inline void sk_drops_reset(struct sock *sk)
 {
+	struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+	if (sdc) {
+		atomic_set(&sdc->drops0, 0);
+		atomic_set(&sdc->drops1, 0);
+	}
 	atomic_set(&sk->sk_drops, 0);
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 75368823969a7992a55a6f40d87ffb8886de2f39..e66ad1ec3a2d969b71835a492806563519459749 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2505,6 +2505,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 	newsk->sk_wmem_queued	= 0;
 	newsk->sk_forward_alloc = 0;
 	newsk->sk_reserved_mem  = 0;
+	DEBUG_NET_WARN_ON_ONCE(newsk->sk_drop_counters);
 	sk_drops_reset(newsk);
 	newsk->sk_send_head	= NULL;
 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
@@ -4457,6 +4458,7 @@ static int __init sock_struct_check(void)
 #ifdef CONFIG_MEMCG
 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
 #endif
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_drop_counters);
 
 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
-- 
2.51.0.261.g7ce5a0a67e-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ