[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9794af18-4905-46c6-b12c-365ea2f05858@samsung.com>
Date: Thu, 10 Jul 2025 10:34:00 +0200
From: Marek Szyprowski <m.szyprowski@...sung.com>
To: Kuniyuki Iwashima <kuniyu@...gle.com>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
<kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, Kuniyuki Iwashima <kuni1840@...il.com>,
netdev@...r.kernel.org, Jason Baron <jbaron@...mai.com>
Subject: Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
On 04.07.2025 07:48, Kuniyuki Iwashima wrote:
> Netlink has this pattern in some places
>
> if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
> atomic_add(skb->truesize, &sk->sk_rmem_alloc);
>
> , which has the same problem fixed by commit 5a465a0da13e ("udp:
> Fix multiple wraparounds of sk->sk_rmem_alloc.").
>
> For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
> is always false as the two operands are of int.
>
> Then, a single socket can eat as many skb as possible until OOM
> happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
>
> Let's fix it by using atomic_add_return() and comparing the two
> variables as unsigned int.
>
> Before:
> [root@...ora ~]# ss -f netlink
> Recv-Q Send-Q Local Address:Port Peer Address:Port
> -1668710080 0 rtnl:nl_wraparound/293 *
>
> After:
> [root@...ora ~]# ss -f netlink
> Recv-Q Send-Q Local Address:Port Peer Address:Port
> 2147483072 0 rtnl:nl_wraparound/290 *
> ^
> `--- INT_MAX - 576
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Reported-by: Jason Baron <jbaron@...mai.com>
> Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...gle.com>
This patch landed recently in linux-next as commit ae8f160e7eb2
("netlink: Fix wraparounds of sk->sk_rmem_alloc."). In my tests I found
that it breaks wifi drivers operation on my tests boards (various ARM
32bit and 64bit ones). Reverting it on top of next-20250709 fixes this
issue. Here is the log from the failure observed on the Samsung
Peach-Pit Chromebook:
# dmesg | grep wifi
[ 16.174311] mwifiex_sdio mmc2:0001:1: WLAN is not the winner! Skip FW
dnld
[ 16.503969] mwifiex_sdio mmc2:0001:1: WLAN FW is active
[ 16.574635] mwifiex_sdio mmc2:0001:1: host_mlme: disable, key_api: 2
[ 16.586152] mwifiex_sdio mmc2:0001:1: CMD_RESP: cmd 0x242 error,
result=0x2
[ 16.641184] mwifiex_sdio mmc2:0001:1: info: MWIFIEX VERSION: mwifiex
1.0 (15.68.7.p87)
[ 16.649474] mwifiex_sdio mmc2:0001:1: driver_version = mwifiex 1.0
(15.68.7.p87)
[ 25.953285] mwifiex_sdio mmc2:0001:1 wlan0: renamed from mlan0
# ifconfig wlan0 up
# iw wlan0 scan
command failed: No buffer space available (-105)
#
Let me know if You need more information to debug this issue.
> ---
> net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++----------------
> 1 file changed, 49 insertions(+), 32 deletions(-)
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index e8972a857e51..79fbaf7333ce 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
> WARN_ON(skb->sk != NULL);
> skb->sk = sk;
> skb->destructor = netlink_skb_destructor;
> - atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> sk_mem_charge(sk, skb->truesize);
> }
>
> @@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
> int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
> long *timeo, struct sock *ssk)
> {
> + DECLARE_WAITQUEUE(wait, current);
> struct netlink_sock *nlk;
> + unsigned int rmem;
>
> nlk = nlk_sk(sk);
> + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
>
> - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
> - test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
> - DECLARE_WAITQUEUE(wait, current);
> - if (!*timeo) {
> - if (!ssk || netlink_is_kernel(ssk))
> - netlink_overrun(sk);
> - sock_put(sk);
> - kfree_skb(skb);
> - return -EAGAIN;
> - }
> -
> - __set_current_state(TASK_INTERRUPTIBLE);
> - add_wait_queue(&nlk->wait, &wait);
> + if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
> + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
> + netlink_skb_set_owner_r(skb, sk);
> + return 0;
> + }
>
> - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
> - test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
> - !sock_flag(sk, SOCK_DEAD))
> - *timeo = schedule_timeout(*timeo);
> + atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
>
> - __set_current_state(TASK_RUNNING);
> - remove_wait_queue(&nlk->wait, &wait);
> + if (!*timeo) {
> + if (!ssk || netlink_is_kernel(ssk))
> + netlink_overrun(sk);
> sock_put(sk);
> + kfree_skb(skb);
> + return -EAGAIN;
> + }
>
> - if (signal_pending(current)) {
> - kfree_skb(skb);
> - return sock_intr_errno(*timeo);
> - }
> - return 1;
> + __set_current_state(TASK_INTERRUPTIBLE);
> + add_wait_queue(&nlk->wait, &wait);
> + rmem = atomic_read(&sk->sk_rmem_alloc);
> +
> + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
> + test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
> + !sock_flag(sk, SOCK_DEAD))
> + *timeo = schedule_timeout(*timeo);
> +
> + __set_current_state(TASK_RUNNING);
> + remove_wait_queue(&nlk->wait, &wait);
> + sock_put(sk);
> +
> + if (signal_pending(current)) {
> + kfree_skb(skb);
> + return sock_intr_errno(*timeo);
> }
> - netlink_skb_set_owner_r(skb, sk);
> - return 0;
> +
> + return 1;
> }
>
> static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
> @@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
> ret = -ECONNREFUSED;
> if (nlk->netlink_rcv != NULL) {
> ret = skb->len;
> + atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> netlink_skb_set_owner_r(skb, sk);
> NETLINK_CB(skb).sk = ssk;
> netlink_deliver_tap_kernel(sk, ssk, skb);
> @@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
> static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
> {
> struct netlink_sock *nlk = nlk_sk(sk);
> + unsigned int rmem, rcvbuf;
>
> - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
> + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
> + rcvbuf = READ_ONCE(sk->sk_rcvbuf);
> +
> + if ((rmem != skb->truesize || rmem <= rcvbuf) &&
> !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
> netlink_skb_set_owner_r(skb, sk);
> __netlink_sendskb(sk, skb);
> - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
> + return rmem > (rcvbuf >> 1);
> }
> +
> + atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
> return -1;
> }
>
> @@ -2249,6 +2262,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
> struct module *module;
> int err = -ENOBUFS;
> int alloc_min_size;
> + unsigned int rmem;
> int alloc_size;
>
> if (!lock_taken)
> @@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
> goto errout_skb;
> }
>
> - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
> - goto errout_skb;
> -
> /* NLMSG_GOODSIZE is small to avoid high order allocations being
> * required, but it makes sense to _attempt_ a 32KiB allocation
> * to reduce number of system calls on dump operations, if user
> @@ -2283,6 +2294,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
> if (!skb)
> goto errout_skb;
>
> + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
> + if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
> + atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
> + goto errout_skb;
> + }
> +
> /* Trim skb to allocated size. User is expected to provide buffer as
> * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
> * netlink_recvmsg())). dump will pack as many smaller messages as
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Powered by blists - more mailing lists