[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20211203013934.20645-1-kuniyu@amazon.co.jp>
Date: Fri, 3 Dec 2021 10:39:34 +0900
From: Kuniyuki Iwashima <kuniyu@...zon.co.jp>
To: <bigeasy@...utronix.de>
CC: <eric.dumazet@...il.com>, <kafai@...com>, <davem@...emloft.net>,
<dsahern@...nel.org>, <efault@....de>, <kuba@...nel.org>,
<netdev@...r.kernel.org>, <tglx@...utronix.de>,
<yoshfuji@...ux-ipv6.org>, Kuniyuki Iwashima <kuniyu@...zon.co.jp>
Subject: Re: [PATCH net-next] tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Date: Tue, 30 Nov 2021 15:23:02 +0100
> Commit
> 9652dc2eb9e40 ("tcp: relax listening_hash operations")
>
> removed the need to disable bottom half while acquiring
> listening_hash.lock. There are still two callers left which disable
> bottom half before the lock is acquired.
>
> On PREEMPT_RT the softirqs are preemptible and local_bh_disable() acts
> as a lock to ensure that resources, that are protected by disabling
> bottom halves, remain protected.
> This leads to a circular locking dependency if the lock acquired with
> disabled bottom halves is also acquired with enabled bottom halves
> followed by disabling bottom halves. This is the reverse locking order.
> It has been observed with inet_listen_hashbucket::lock:
>
> local_bh_disable() + spin_lock(&ilb->lock):
> inet_listen()
> inet_csk_listen_start()
> sk->sk_prot->hash() := inet_hash()
> local_bh_disable()
> __inet_hash()
> spin_lock(&ilb->lock);
> acquire(&ilb->lock);
>
> Reverse order: spin_lock(&ilb->lock) + local_bh_disable():
> tcp_seq_next()
> listening_get_next()
> spin_lock(&ilb->lock);
> acquire(&ilb->lock);
>
> tcp4_seq_show()
> get_tcp4_sock()
> sock_i_ino()
> read_lock_bh(&sk->sk_callback_lock);
> acquire(softirq_ctrl) // <---- whoops
> acquire(&sk->sk_callback_lock)
>
> Drop local_bh_disable() around __inet_hash() which acquires
> listening_hash->lock. Split inet_unhash() and acquire the
> listen_hashbucket lock without disabling bottom halves; the inet_ehash
> lock with disabled bottom halves.
>
> Reported-by: Mike Galbraith <efault@....de>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
> Link: https://lkml.kernel.org/r/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de
> Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net
I think this patch is for the net tree and needs a Fixes: tag of the commit
mentioned in the description.
The patch itself looks good to me.
Acked-by: Kuniyuki Iwashima <kuniyu@...zon.co.jp>
Also, added Eric and Martin on CC.
> ---
> net/ipv4/inet_hashtables.c | 53 ++++++++++++++++++++++---------------
> net/ipv6/inet6_hashtables.c | 5 +---
> 2 files changed, 33 insertions(+), 25 deletions(-)
>
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index 75737267746f8..7bd1e10086f0a 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
> int err = 0;
>
> if (sk->sk_state != TCP_LISTEN) {
> + local_bh_disable();
> inet_ehash_nolisten(sk, osk, NULL);
> + local_bh_enable();
> return 0;
> }
> WARN_ON(!sk_unhashed(sk));
> @@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
> {
> int err = 0;
>
> - if (sk->sk_state != TCP_CLOSE) {
> - local_bh_disable();
> + if (sk->sk_state != TCP_CLOSE)
> err = __inet_hash(sk, NULL);
> - local_bh_enable();
> - }
>
> return err;
> }
> EXPORT_SYMBOL_GPL(inet_hash);
>
> -void inet_unhash(struct sock *sk)
> +static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
> {
> - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
> - struct inet_listen_hashbucket *ilb = NULL;
> - spinlock_t *lock;
> -
> if (sk_unhashed(sk))
> return;
>
> - if (sk->sk_state == TCP_LISTEN) {
> - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
> - lock = &ilb->lock;
> - } else {
> - lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
> - }
> - spin_lock_bh(lock);
> - if (sk_unhashed(sk))
> - goto unlock;
> -
> if (rcu_access_pointer(sk->sk_reuseport_cb))
> reuseport_stop_listen_sock(sk);
> if (ilb) {
> + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
> +
> inet_unhash2(hashinfo, sk);
> ilb->count--;
> }
> __sk_nulls_del_node_init_rcu(sk);
> sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
> -unlock:
> - spin_unlock_bh(lock);
> +}
> +
> +void inet_unhash(struct sock *sk)
> +{
> + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
> +
> + if (sk_unhashed(sk))
> + return;
> +
> + if (sk->sk_state == TCP_LISTEN) {
> + struct inet_listen_hashbucket *ilb;
> +
> + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
> + /* Don't disable bottom halves while acquiring the lock to
> + * avoid circular locking dependency on PREEMPT_RT.
> + */
> + spin_lock(&ilb->lock);
> + __inet_unhash(sk, ilb);
> + spin_unlock(&ilb->lock);
> + } else {
> + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
> +
> + spin_lock_bh(lock);
> + __inet_unhash(sk, NULL);
> + spin_unlock_bh(lock);
> + }
> }
> EXPORT_SYMBOL_GPL(inet_unhash);
>
> diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
> index 67c9114835c84..0a2e7f2283911 100644
> --- a/net/ipv6/inet6_hashtables.c
> +++ b/net/ipv6/inet6_hashtables.c
> @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
> {
> int err = 0;
>
> - if (sk->sk_state != TCP_CLOSE) {
> - local_bh_disable();
> + if (sk->sk_state != TCP_CLOSE)
> err = __inet_hash(sk, NULL);
> - local_bh_enable();
> - }
>
> return err;
> }
> --
> 2.34.1
>
Powered by blists - more mailing lists