[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1285184088.2380.18.camel@edumazet-laptop>
Date: Wed, 22 Sep 2010 21:34:48 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>,
David Miller <davem@...emloft.net>
Cc: linux-fsdevel@...r.kernel.org, netdev@...r.kernel.org
Subject: [PATCH] net: fix a lockdep splat
Le mercredi 22 septembre 2010 à 19:35 +0200, Eric Dumazet a écrit :
> Thanks !
>
> We have for each socket :
>
> One spinlock (sk_slock.slock)
> One rwlock (sk_callback_lock)
>
> It is legal to use :
>
> A) (this is used in net/sunrpc/xprtsock.c)
> read_lock(&sk->sk_callback_lock) (without blocking BH)
> <BH>
> spin_lock(&sk->sk_slock.slock);
> ...
> read_lock(&sk->sk_callback_lock);
> ...
>
> Its also legal to do
>
> B)
> write_lock_bh(&sk->sk_callback_lock)
> stuff
> write_unlock_bh(&sk->sk_callback_lock)
>
>
> But if we have a path that :
>
> C)
> spin_lock_bh(&sk->sk_slock)
> ...
> write_lock_bh(&sk->sk_callback_lock)
> stuff
> write_unlock_bh(&sk->sk_callback_lock)
>
> Then we can have a deadlock with A)
>
> CPU1 [A] CPU2 [C]
> read_lock(&sk->sk_callback_lock)
> <BH> spin_lock_bh(&sk->sk_slock)
> <wait to spin_lock(slock)>
> <wait to write_lock_bh(callback_lock)>
>
> We have one such path C) in inet_csk_listen_stop() :
>
> local_bh_disable();
> bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
> WARN_ON(sock_owned_by_user(child));
> ...
> sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)
>
> This is a false positive because its not possible that this particular
> deadlock can occur, since inet_csk_listen_stop() manipulates half
> sockets (not yet given to a listener)
>
> Give me a moment to think about it and write a fix.
>
>
Could you try following patch ?
Thanks !
[PATCH] net: fix a lockdep splat
We have for each socket :
One spinlock (sk_slock.slock)
One rwlock (sk_callback_lock)
Possible scenarios are :
(A) (this is used in net/sunrpc/xprtsock.c)
read_lock(&sk->sk_callback_lock) (without blocking BH)
<BH>
spin_lock(&sk->sk_slock.slock);
...
read_lock(&sk->sk_callback_lock);
...
(B)
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
(C)
spin_lock_bh(&sk->sk_slock)
...
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
spin_unlock_bh(&sk->sk_slock)
This (C) case conflicts with (A) :
CPU1 [A] CPU2 [C]
read_lock(callback_lock)
<BH> spin_lock_bh(slock)
<wait to spin_lock(slock)>
<wait to write_lock_bh(callback_lock)>
We have one problematic (C) use case in inet_csk_listen_stop() :
local_bh_disable();
bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
WARN_ON(sock_owned_by_user(child));
...
sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)
lockdep is not happy with this, as reported by Tetsuo Handa
This patch makes sure inet_csk_listen_stop() uses following lock order :
write_lock_bh(&sk->sk_callback_lock)
spin_lock(&sk->sk_slock)
...
spin_unlock(&sk->sk_slock)
write_unlock_bh(&sk->sk_callback_lock)
Reported-by: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
include/net/sock.h | 18 +++++++++++++++---
net/ipv4/inet_connection_sock.c | 7 ++++---
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index adab9dc..b6c60d5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1242,6 +1242,14 @@ static inline wait_queue_head_t *sk_sleep(struct sock *sk)
{
return &sk->sk_wq->wait;
}
+
+static inline void __sock_orphan(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_DEAD);
+ sk_set_socket(sk, NULL);
+ sk->sk_wq = NULL;
+}
+
/* Detach socket from process context.
* Announce socket dead, detach it from wait queue and inode.
* Note that parent inode held reference count on this struct sock,
@@ -1251,15 +1259,19 @@ static inline wait_queue_head_t *sk_sleep(struct sock *sk)
*/
static inline void sock_orphan(struct sock *sk)
{
+#ifdef CONFIG_PROVE_LOCKING
+ WARN_ON(lockdep_is_held(&sk->sk_lock.slock));
+#endif
write_lock_bh(&sk->sk_callback_lock);
- sock_set_flag(sk, SOCK_DEAD);
- sk_set_socket(sk, NULL);
- sk->sk_wq = NULL;
+ __sock_orphan(sk);
write_unlock_bh(&sk->sk_callback_lock);
}
static inline void sock_graft(struct sock *sk, struct socket *parent)
{
+#ifdef CONFIG_PROVE_LOCKING
+ WARN_ON(lockdep_is_held(&sk->sk_lock.slock));
+#endif
write_lock_bh(&sk->sk_callback_lock);
rcu_assign_pointer(sk->sk_wq, parent->wq);
parent->sk = sk;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7174370..c65df13 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -685,21 +685,22 @@ void inet_csk_listen_stop(struct sock *sk)
acc_req = req->dl_next;
- local_bh_disable();
+ write_lock_bh(&child->sk_callback_lock);
+
bh_lock_sock(child);
WARN_ON(sock_owned_by_user(child));
sock_hold(child);
sk->sk_prot->disconnect(child, O_NONBLOCK);
- sock_orphan(child);
+ __sock_orphan(child);
percpu_counter_inc(sk->sk_prot->orphan_count);
inet_csk_destroy_sock(child);
bh_unlock_sock(child);
- local_bh_enable();
+ write_unlock_bh(&child->sk_callback_lock);
sock_put(child);
sk_acceptq_removed(sk);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists