[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iKc_7RNordD-YcZv9DPw8CNubnDVkhgYGma20q4cxgAdw@mail.gmail.com>
Date: Mon, 7 Apr 2025 13:21:11 +0200
From: Eric Dumazet <edumazet@...gle.com>
To: Kuniyuki Iwashima <kuniyu@...zon.com>
Cc: "David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Willem de Bruijn <willemb@...gle.com>, Simon Horman <horms@...nel.org>,
Peter Zijlstra <peterz@...radead.org>, Steve French <sfrench@...ba.org>,
Enzo Matsumiya <ematsumiya@...e.de>, Wang Zhaolong <wangzhaolong1@...wei.com>,
Kuniyuki Iwashima <kuni1840@...il.com>, netdev@...r.kernel.org, stable@...r.kernel.org
Subject: Re: [PATCH v2 net] net: Fix null-ptr-deref by sock_lock_init_class_and_name()
and rmmod.
On Thu, Apr 3, 2025 at 10:36 PM Kuniyuki Iwashima <kuniyu@...zon.com> wrote:
>
> When I ran the repro [0] and waited a few seconds, I observed two
> LOCKDEP splats: a warning immediately followed by a null-ptr-deref. [1]
>
> Reproduction Steps:
>
> 1) Mount CIFS
> 2) Add an iptables rule to drop incoming FIN packets for CIFS
> 3) Unmount CIFS
> 4) Unload the CIFS module
> 5) Remove the iptables rule
>
> At step 3), the CIFS module calls sock_release() for the underlying
> TCP socket, and it returns quickly. However, the socket remains in
> FIN_WAIT_1 because incoming FIN packets are dropped.
>
> At this point, the module's refcnt is 0 while the socket is still
> alive, so the following rmmod command succeeds.
>
> # ss -tan
> State Recv-Q Send-Q Local Address:Port Peer Address:Port
> FIN-WAIT-1 0 477 10.0.2.15:51062 10.0.0.137:445
>
> # lsmod | grep cifs
> cifs 1159168 0
>
> This highlights a discrepancy between the lifetime of the CIFS module
> and the underlying TCP socket. Even after CIFS calls sock_release()
> and it returns, the TCP socket does not die immediately in order to
> close the connection gracefully.
>
> While this is generally fine, it causes an issue with LOCKDEP because
> CIFS assigns a different lock class to the TCP socket's sk->sk_lock
> using sock_lock_init_class_and_name().
>
> Once an incoming packet is processed for the socket or a timer fires,
> sk->sk_lock is acquired.
>
> Then, LOCKDEP checks the lock context in check_wait_context(), where
> hlock_class() is called to retrieve the lock class. However, since
> the module has already been unloaded, hlock_class() logs a warning
> and returns NULL, triggering the null-ptr-deref.
>
> I
>
> Fixes: ed07536ed673 ("[PATCH] lockdep: annotate nfs/nfsd in-kernel sockets")
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>
> Cc: stable@...r.kernel.org
> ---
> v2:
> * Clear sk_owner in sock_lock_init()
> * Define helper under the same #if guard
> * Remove redundant null check before module_put()
>
> v1: https://lore.kernel.org/netdev/20250403020837.51664-1-kuniyu@amazon.com/
> ---
> include/net/sock.h | 38 ++++++++++++++++++++++++++++++++++++--
> net/core/sock.c | 4 ++++
> 2 files changed, 40 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 8daf1b3b12c6..4216d7d86150 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -547,6 +547,10 @@ struct sock {
> struct rcu_head sk_rcu;
> netns_tracker ns_tracker;
> struct xarray sk_user_frags;
> +
> +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
> + struct module *sk_owner;
> +#endif
> };
>
> struct sock_bh_locked {
> @@ -1583,6 +1587,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
> sk_mem_reclaim(sk);
> }
>
> +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
> +static inline void sk_owner_set(struct sock *sk, struct module *owner)
> +{
> + __module_get(owner);
> + sk->sk_owner = owner;
> +}
> +
> +static inline void sk_owner_clear(struct sock *sk)
> +{
> + sk->sk_owner = NULL;
> +}
> +
> +static inline void sk_owner_put(struct sock *sk)
> +{
> + module_put(sk->sk_owner);
> +}
> +#else
> +static inline void sk_owner_set(struct sock *sk, struct module *owner)
> +{
> +}
> +
> +static inline void sk_owner_clear(struct sock *sk)
> +{
> +}
> +
> +static inline void sk_owner_put(struct sock *sk)
> +{
> +}
> +#endif
> /*
> * Macro so as to not evaluate some arguments when
> * lockdep is not enabled.
> @@ -1592,13 +1625,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
> */
> #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \
> do { \
> + sk_owner_set(sk, THIS_MODULE); \
> sk->sk_lock.owned = 0; \
> init_waitqueue_head(&sk->sk_lock.wq); \
> spin_lock_init(&(sk)->sk_lock.slock); \
> debug_check_no_locks_freed((void *)&(sk)->sk_lock, \
> - sizeof((sk)->sk_lock)); \
> + sizeof((sk)->sk_lock)); \
> lockdep_set_class_and_name(&(sk)->sk_lock.slock, \
> - (skey), (sname)); \
> + (skey), (sname)); \
> lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
> } while (0)
>
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 323892066def..d426c5f8e20f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2130,6 +2130,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
> */
> static inline void sock_lock_init(struct sock *sk)
> {
> + sk_owner_clear(sk);
> +
> if (sk->sk_kern_sock)
> sock_lock_init_class_and_name(
> sk,
> @@ -2324,6 +2326,8 @@ static void __sk_destruct(struct rcu_head *head)
> __netns_tracker_free(net, &sk->ns_tracker, false);
> net_passive_dec(net);
> }
> +
> + sk_owner_put(sk);
I am not convinced that the socket lock can be used after this point,
now or in the future.
> sk_prot_free(sk->sk_prot_creator, sk);
> }
Maybe move this in sk_prot_free() instead ?
diff --git a/net/core/sock.c b/net/core/sock.c
index 323892066def..9ab149d1584c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2226,6 +2226,9 @@ static void sk_prot_free(struct proto *prot,
struct sock *sk)
cgroup_sk_free(&sk->sk_cgrp_data);
mem_cgroup_sk_free(sk);
security_sk_free(sk);
+
+ sk_owner_put(sk);
+
if (slab != NULL)
kmem_cache_free(slab, sk);
else
Powered by blists - more mailing lists