netdev - [PATCH net-next] netlink: Fix wraparounds of sk->sk_rmem

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250609161244.3591029-1-jbaron@akamai.com>
Date: Mon,  9 Jun 2025 12:12:44 -0400
From: Jason Baron <jbaron@...mai.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
        pabeni@...hat.com, horms@...nel.org, kuniyu@...zon.com
Subject: [PATCH net-next] netlink: Fix wraparounds of sk->sk_rmem_alloc

For netlink sockets, when comparing allocated rmem memory with the
rcvbuf limit, the comparison is done using signed values. This means
that if rcvbuf is near INT_MAX, then sk->sk_rmem_alloc may become
negative in the comparison with rcvbuf which will yield incorrect
results.

This can be reproduced by using the program from SOCK_DIAG(7) with
some slight modifications. First, setting sk->sk_rcvbuf to INT_MAX
using SO_RCVBUFFORCE and then secondly running the "send_query()"
in a loop while not calling "receive_responses()". In this case,
the value of sk->sk_rmem_alloc will continuously wrap around
and thus more memory is allocated than the sk->sk_rcvbuf limit.
This will eventually fill all of memory leading to an out of memory
condition with skbs filling up the slab.

Let's fix this in a similar manner to:
5a465a0da13e ("udp: Fix multiple wraparounds of sk->sk_rmem_alloc.")

As noted in that fix, if there are multiple threads writing to a
netlink socket it's possible to slightly exceed rcvbuf value. But as
noted this avoids an expensive 'atomic_add_return()' for the common
case. I've confirmed that with the fix the modified program from
SOCK_DIAG(7) can no longer fill memory and the sk->sk_rcvbuf limit
is enforced.

Signed-off-by: Jason Baron <jbaron@...mai.com>
---
 net/netlink/af_netlink.c | 47 ++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e8972a857e51..607e5d72de39 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1213,11 +1213,15 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		      long *timeo, struct sock *ssk)
 {
 	struct netlink_sock *nlk;
+	unsigned int rmem, rcvbuf, size;
 
 	nlk = nlk_sk(sk);
+	rmem = atomic_read(&sk->sk_rmem_alloc);
+	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+	size = skb->truesize;
 
-	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	     test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
+	if (((rmem + size) > rcvbuf) ||
+	     test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 		DECLARE_WAITQUEUE(wait, current);
 		if (!*timeo) {
 			if (!ssk || netlink_is_kernel(ssk))
@@ -1230,7 +1234,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		__set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&nlk->wait, &wait);
 
-		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+		rmem = atomic_read(&sk->sk_rmem_alloc);
+		rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+		if ((((rmem + size) > rcvbuf) ||
 		     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
 		    !sock_flag(sk, SOCK_DEAD))
 			*timeo = schedule_timeout(*timeo);
@@ -1383,12 +1389,18 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	unsigned int rmem, rcvbuf;
 
-	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+	rmem = atomic_read(&sk->sk_rmem_alloc);
+	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+	if (((rmem + skb->truesize) <= rcvbuf) &&
 	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 		netlink_skb_set_owner_r(skb, sk);
 		__netlink_sendskb(sk, skb);
-		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+		rmem = atomic_read(&sk->sk_rmem_alloc);
+		rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+		return rmem > (rcvbuf >> 1);
 	}
 	return -1;
 }
@@ -1896,6 +1908,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	size_t copied, max_recvmsg_len;
 	struct sk_buff *skb, *data_skb;
 	int err, ret;
+	unsigned int rmem, rcvbuf;
 
 	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
@@ -1960,12 +1973,15 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	skb_free_datagram(sk, skb);
 
-	if (READ_ONCE(nlk->cb_running) &&
-	    atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
-		ret = netlink_dump(sk, false);
-		if (ret) {
-			WRITE_ONCE(sk->sk_err, -ret);
-			sk_error_report(sk);
+	if (READ_ONCE(nlk->cb_running)) {
+		rmem = atomic_read(&sk->sk_rmem_alloc);
+		rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+		if (rmem <= (rcvbuf >> 1)) {
+			ret = netlink_dump(sk, false);
+			if (ret) {
+				WRITE_ONCE(sk->sk_err, -ret);
+				sk_error_report(sk);
+			}
 		}
 	}
 
@@ -2250,6 +2266,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	int err = -ENOBUFS;
 	int alloc_min_size;
 	int alloc_size;
+	unsigned int rmem, rcvbuf;
 
 	if (!lock_taken)
 		mutex_lock(&nlk->nl_cb_mutex);
@@ -2258,9 +2275,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 		goto errout_skb;
 	}
 
-	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		goto errout_skb;
-
 	/* NLMSG_GOODSIZE is small to avoid high order allocations being
 	 * required, but it makes sense to _attempt_ a 32KiB allocation
 	 * to reduce number of system calls on dump operations, if user
@@ -2283,6 +2297,11 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	if (!skb)
 		goto errout_skb;
 
+	rmem = atomic_read(&sk->sk_rmem_alloc);
+	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+	if ((rmem + skb->truesize) > rcvbuf)
+		goto errout_skb;
+
 	/* Trim skb to allocated size. User is expected to provide buffer as
 	 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
 	 * netlink_recvmsg())). dump will pack as many smaller messages as
-- 
2.25.1