netdev - [PATCH net-next v2 3/3] netlink: Fix wraparound of sk->sk_rmem

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <2ead6fd79411342e29710859db0f1f8520092f1f.1750285100.git.jbaron@akamai.com>
Date: Wed, 18 Jun 2025 19:13:23 -0400
From: Jason Baron <jbaron@...mai.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
        pabeni@...hat.com, horms@...nel.org, kuniyu@...zon.com
Subject: [PATCH net-next v2 3/3] netlink: Fix wraparound of sk->sk_rmem_alloc

For netlink sockets, when comparing allocated rmem memory with the
rcvbuf limit, the comparison is done using signed values. This means
that if rcvbuf is near INT_MAX, then sk->sk_rmem_alloc may become
negative in the comparison with rcvbuf which will yield incorrect
results.

This can be reproduced by using the program from SOCK_DIAG(7) with
some slight modifications. First, setting sk->sk_rcvbuf to INT_MAX
using SO_RCVBUFFORCE and then secondly running the "send_query()"
in a loop while not calling "receive_responses()". In this case,
the value of sk->sk_rmem_alloc will continuously wrap around
and thus more memory is allocated than the sk->sk_rcvbuf limit.
This will eventually fill all of memory leading to an out of memory
condition with skbs filling up the slab.

Let's fix this in a similar manner to:
commit 5a465a0da13e ("udp: Fix multiple wraparounds of sk->sk_rmem_alloc.")

As noted in that fix, if there are multiple threads writing to a
netlink socket it's possible to slightly exceed rcvbuf value. But as
noted this avoids an expensive 'atomic_add_return()' for the common
case. I've confirmed that with the fix the modified program from
SOCK_DIAG(7) can no longer fill memory and the sk->sk_rcvbuf limit
is enforced.

Signed-off-by: Jason Baron <jbaron@...mai.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
---
 net/netlink/af_netlink.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e8972a857e51..a770b90abe7d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1216,8 +1216,8 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 
 	nlk = nlk_sk(sk);
 
-	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	     test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
+	if (!sock_rcvbuf_has_space(sk, skb) ||
+	    test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 		DECLARE_WAITQUEUE(wait, current);
 		if (!*timeo) {
 			if (!ssk || netlink_is_kernel(ssk))
@@ -1230,7 +1230,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		__set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&nlk->wait, &wait);
 
-		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+		if ((!sock_rcvbuf_has_space(sk, skb) ||
 		     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
 		    !sock_flag(sk, SOCK_DEAD))
 			*timeo = schedule_timeout(*timeo);
@@ -1383,12 +1383,15 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	unsigned int rmem, rcvbuf;
 
-	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+	if (sock_rcvbuf_has_space(sk, skb) &&
 	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 		netlink_skb_set_owner_r(skb, sk);
 		__netlink_sendskb(sk, skb);
-		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+		rmem = atomic_read(&sk->sk_rmem_alloc);
+		rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+		return rmem > (rcvbuf >> 1);
 	}
 	return -1;
 }
@@ -1895,6 +1898,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	size_t copied, max_recvmsg_len;
 	struct sk_buff *skb, *data_skb;
+	unsigned int rmem, rcvbuf;
 	int err, ret;
 
 	if (flags & MSG_OOB)
@@ -1960,12 +1964,15 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	skb_free_datagram(sk, skb);
 
-	if (READ_ONCE(nlk->cb_running) &&
-	    atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
-		ret = netlink_dump(sk, false);
-		if (ret) {
-			WRITE_ONCE(sk->sk_err, -ret);
-			sk_error_report(sk);
+	if (READ_ONCE(nlk->cb_running)) {
+		rmem = atomic_read(&sk->sk_rmem_alloc);
+		rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+		if (rmem <= (rcvbuf >> 1)) {
+			ret = netlink_dump(sk, false);
+			if (ret) {
+				WRITE_ONCE(sk->sk_err, -ret);
+				sk_error_report(sk);
+			}
 		}
 	}
 
@@ -2258,9 +2265,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 		goto errout_skb;
 	}
 
-	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		goto errout_skb;
-
 	/* NLMSG_GOODSIZE is small to avoid high order allocations being
 	 * required, but it makes sense to _attempt_ a 32KiB allocation
 	 * to reduce number of system calls on dump operations, if user
@@ -2283,6 +2287,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	if (!skb)
 		goto errout_skb;
 
+	if (!sock_rcvbuf_has_space(sk, skb))
+		goto errout_skb;
+
 	/* Trim skb to allocated size. User is expected to provide buffer as
 	 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
 	 * netlink_recvmsg())). dump will pack as many smaller messages as
-- 
2.25.1