lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Fri, 1 Jun 2007 13:34:30 +0200
From:	Frederik Deweerdt <deweerdt@...e.fr>
To:	David Miller <davem@...emloft.net>
Cc:	netdev@...r.kernel.org, eparis@...hat.com
Subject: Re: [Oops] unix_dgram_connect locking problem?

On Thu, May 31, 2007 at 03:22:27PM -0700, David Miller wrote:
> From: Frederik Deweerdt <deweerdt@...e.fr>
> Date: Fri, 11 May 2007 17:00:14 +0200
> 
> > I'm seeing an Oops[1] with a 2.6.19.2 kernel:
> 
> Frederik, I finally was able to spend some quality time on
> this issue today.  Sorry for taking so long.

Thank you for handling this David. I going to be offline for two weeks,
but I'll be able to report tests results after that.

Regards,
Frederik

> I came up with a series of two patches, the first one makes
> the locking easier to understand by fixing the unix_sock_*()
> lock macro names.
> 
> The second patch fixes this race conditon by properly holding
> both socket locks, being careful to avoid deadlocks, and
> checking for possible SOCK_DEAD state.
> 
> --------------------
> commit d410b81b4eef2e4409f9c38ef201253fbbcc7d94
> Author: David S. Miller <davem@...set.davemloft.net>
> Date:   Thu May 31 13:24:26 2007 -0700
> 
>     [AF_UNIX]: Make socket locking much less confusing.
>     
>     The unix_state_*() locking macros imply that there is some
>     rwlock kind of thing going on, but the implementation is
>     actually a spinlock which makes the code more confusing than
>     it needs to be.
>     
>     So use plain unix_state_lock and unix_state_unlock.
>     
>     Signed-off-by: David S. Miller <davem@...emloft.net>
> 
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index c0398f5..65f49fd 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -62,13 +62,11 @@ struct unix_skb_parms {
>  #define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
>  #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
>  
> -#define unix_state_rlock(s)	spin_lock(&unix_sk(s)->lock)
> -#define unix_state_runlock(s)	spin_unlock(&unix_sk(s)->lock)
> -#define unix_state_wlock(s)	spin_lock(&unix_sk(s)->lock)
> -#define unix_state_wlock_nested(s) \
> +#define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
> +#define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
> +#define unix_state_lock_nested(s) \
>  				spin_lock_nested(&unix_sk(s)->lock, \
>  				SINGLE_DEPTH_NESTING)
> -#define unix_state_wunlock(s)	spin_unlock(&unix_sk(s)->lock)
>  
>  #ifdef __KERNEL__
>  /* The AF_UNIX socket */
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index fc12ba5..453ede8 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -174,11 +174,11 @@ static struct sock *unix_peer_get(struct sock *s)
>  {
>  	struct sock *peer;
>  
> -	unix_state_rlock(s);
> +	unix_state_lock(s);
>  	peer = unix_peer(s);
>  	if (peer)
>  		sock_hold(peer);
> -	unix_state_runlock(s);
> +	unix_state_unlock(s);
>  	return peer;
>  }
>  
> @@ -369,7 +369,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
>  	unix_remove_socket(sk);
>  
>  	/* Clear state */
> -	unix_state_wlock(sk);
> +	unix_state_lock(sk);
>  	sock_orphan(sk);
>  	sk->sk_shutdown = SHUTDOWN_MASK;
>  	dentry	     = u->dentry;
> @@ -378,7 +378,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
>  	u->mnt	     = NULL;
>  	state = sk->sk_state;
>  	sk->sk_state = TCP_CLOSE;
> -	unix_state_wunlock(sk);
> +	unix_state_unlock(sk);
>  
>  	wake_up_interruptible_all(&u->peer_wait);
>  
> @@ -386,12 +386,12 @@ static int unix_release_sock (struct sock *sk, int embrion)
>  
>  	if (skpair!=NULL) {
>  		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
> -			unix_state_wlock(skpair);
> +			unix_state_lock(skpair);
>  			/* No more writes */
>  			skpair->sk_shutdown = SHUTDOWN_MASK;
>  			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
>  				skpair->sk_err = ECONNRESET;
> -			unix_state_wunlock(skpair);
> +			unix_state_unlock(skpair);
>  			skpair->sk_state_change(skpair);
>  			read_lock(&skpair->sk_callback_lock);
>  			sk_wake_async(skpair,1,POLL_HUP);
> @@ -448,7 +448,7 @@ static int unix_listen(struct socket *sock, int backlog)
>  	err = -EINVAL;
>  	if (!u->addr)
>  		goto out;			/* No listens on an unbound socket */
> -	unix_state_wlock(sk);
> +	unix_state_lock(sk);
>  	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
>  		goto out_unlock;
>  	if (backlog > sk->sk_max_ack_backlog)
> @@ -462,7 +462,7 @@ static int unix_listen(struct socket *sock, int backlog)
>  	err = 0;
>  
>  out_unlock:
> -	unix_state_wunlock(sk);
> +	unix_state_unlock(sk);
>  out:
>  	return err;
>  }
> @@ -881,7 +881,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  		if (!other)
>  			goto out;
>  
> -		unix_state_wlock(sk);
> +		unix_state_lock(sk);
>  
>  		err = -EPERM;
>  		if (!unix_may_send(sk, other))
> @@ -896,7 +896,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  		 *	1003.1g breaking connected state with AF_UNSPEC
>  		 */
>  		other = NULL;
> -		unix_state_wlock(sk);
> +		unix_state_lock(sk);
>  	}
>  
>  	/*
> @@ -905,19 +905,19 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  	if (unix_peer(sk)) {
>  		struct sock *old_peer = unix_peer(sk);
>  		unix_peer(sk)=other;
> -		unix_state_wunlock(sk);
> +		unix_state_unlock(sk);
>  
>  		if (other != old_peer)
>  			unix_dgram_disconnected(sk, old_peer);
>  		sock_put(old_peer);
>  	} else {
>  		unix_peer(sk)=other;
> -		unix_state_wunlock(sk);
> +		unix_state_unlock(sk);
>  	}
>  	return 0;
>  
>  out_unlock:
> -	unix_state_wunlock(sk);
> +	unix_state_unlock(sk);
>  	sock_put(other);
>  out:
>  	return err;
> @@ -936,7 +936,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
>  		(skb_queue_len(&other->sk_receive_queue) >
>  		 other->sk_max_ack_backlog);
>  
> -	unix_state_runlock(other);
> +	unix_state_unlock(other);
>  
>  	if (sched)
>  		timeo = schedule_timeout(timeo);
> @@ -994,11 +994,11 @@ restart:
>  		goto out;
>  
>  	/* Latch state of peer */
> -	unix_state_rlock(other);
> +	unix_state_lock(other);
>  
>  	/* Apparently VFS overslept socket death. Retry. */
>  	if (sock_flag(other, SOCK_DEAD)) {
> -		unix_state_runlock(other);
> +		unix_state_unlock(other);
>  		sock_put(other);
>  		goto restart;
>  	}
> @@ -1048,18 +1048,18 @@ restart:
>  		goto out_unlock;
>  	}
>  
> -	unix_state_wlock_nested(sk);
> +	unix_state_lock_nested(sk);
>  
>  	if (sk->sk_state != st) {
> -		unix_state_wunlock(sk);
> -		unix_state_runlock(other);
> +		unix_state_unlock(sk);
> +		unix_state_unlock(other);
>  		sock_put(other);
>  		goto restart;
>  	}
>  
>  	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
>  	if (err) {
> -		unix_state_wunlock(sk);
> +		unix_state_unlock(sk);
>  		goto out_unlock;
>  	}
>  
> @@ -1096,7 +1096,7 @@ restart:
>  	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
>  	unix_peer(sk)	= newsk;
>  
> -	unix_state_wunlock(sk);
> +	unix_state_unlock(sk);
>  
>  	/* take ten and and send info to listening sock */
>  	spin_lock(&other->sk_receive_queue.lock);
> @@ -1105,14 +1105,14 @@ restart:
>  	 * is installed to listening socket. */
>  	atomic_inc(&newu->inflight);
>  	spin_unlock(&other->sk_receive_queue.lock);
> -	unix_state_runlock(other);
> +	unix_state_unlock(other);
>  	other->sk_data_ready(other, 0);
>  	sock_put(other);
>  	return 0;
>  
>  out_unlock:
>  	if (other)
> -		unix_state_runlock(other);
> +		unix_state_unlock(other);
>  
>  out:
>  	if (skb)
> @@ -1178,10 +1178,10 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
>  	wake_up_interruptible(&unix_sk(sk)->peer_wait);
>  
>  	/* attach accepted sock to socket */
> -	unix_state_wlock(tsk);
> +	unix_state_lock(tsk);
>  	newsock->state = SS_CONNECTED;
>  	sock_graft(tsk, newsock);
> -	unix_state_wunlock(tsk);
> +	unix_state_unlock(tsk);
>  	return 0;
>  
>  out:
> @@ -1208,7 +1208,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
>  	}
>  
>  	u = unix_sk(sk);
> -	unix_state_rlock(sk);
> +	unix_state_lock(sk);
>  	if (!u->addr) {
>  		sunaddr->sun_family = AF_UNIX;
>  		sunaddr->sun_path[0] = 0;
> @@ -1219,7 +1219,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
>  		*uaddr_len = addr->len;
>  		memcpy(sunaddr, addr->name, *uaddr_len);
>  	}
> -	unix_state_runlock(sk);
> +	unix_state_unlock(sk);
>  	sock_put(sk);
>  out:
>  	return err;
> @@ -1337,7 +1337,7 @@ restart:
>  			goto out_free;
>  	}
>  
> -	unix_state_rlock(other);
> +	unix_state_lock(other);
>  	err = -EPERM;
>  	if (!unix_may_send(sk, other))
>  		goto out_unlock;
> @@ -1347,20 +1347,20 @@ restart:
>  		 *	Check with 1003.1g - what should
>  		 *	datagram error
>  		 */
> -		unix_state_runlock(other);
> +		unix_state_unlock(other);
>  		sock_put(other);
>  
>  		err = 0;
> -		unix_state_wlock(sk);
> +		unix_state_lock(sk);
>  		if (unix_peer(sk) == other) {
>  			unix_peer(sk)=NULL;
> -			unix_state_wunlock(sk);
> +			unix_state_unlock(sk);
>  
>  			unix_dgram_disconnected(sk, other);
>  			sock_put(other);
>  			err = -ECONNREFUSED;
>  		} else {
> -			unix_state_wunlock(sk);
> +			unix_state_unlock(sk);
>  		}
>  
>  		other = NULL;
> @@ -1397,14 +1397,14 @@ restart:
>  	}
>  
>  	skb_queue_tail(&other->sk_receive_queue, skb);
> -	unix_state_runlock(other);
> +	unix_state_unlock(other);
>  	other->sk_data_ready(other, len);
>  	sock_put(other);
>  	scm_destroy(siocb->scm);
>  	return len;
>  
>  out_unlock:
> -	unix_state_runlock(other);
> +	unix_state_unlock(other);
>  out_free:
>  	kfree_skb(skb);
>  out:
> @@ -1494,14 +1494,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
>  			goto out_err;
>  		}
>  
> -		unix_state_rlock(other);
> +		unix_state_lock(other);
>  
>  		if (sock_flag(other, SOCK_DEAD) ||
>  		    (other->sk_shutdown & RCV_SHUTDOWN))
>  			goto pipe_err_free;
>  
>  		skb_queue_tail(&other->sk_receive_queue, skb);
> -		unix_state_runlock(other);
> +		unix_state_unlock(other);
>  		other->sk_data_ready(other, size);
>  		sent+=size;
>  	}
> @@ -1512,7 +1512,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
>  	return sent;
>  
>  pipe_err_free:
> -	unix_state_runlock(other);
> +	unix_state_unlock(other);
>  	kfree_skb(skb);
>  pipe_err:
>  	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
> @@ -1641,7 +1641,7 @@ static long unix_stream_data_wait(struct sock * sk, long timeo)
>  {
>  	DEFINE_WAIT(wait);
>  
> -	unix_state_rlock(sk);
> +	unix_state_lock(sk);
>  
>  	for (;;) {
>  		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
> @@ -1654,14 +1654,14 @@ static long unix_stream_data_wait(struct sock * sk, long timeo)
>  			break;
>  
>  		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
> -		unix_state_runlock(sk);
> +		unix_state_unlock(sk);
>  		timeo = schedule_timeout(timeo);
> -		unix_state_rlock(sk);
> +		unix_state_lock(sk);
>  		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
>  	}
>  
>  	finish_wait(sk->sk_sleep, &wait);
> -	unix_state_runlock(sk);
> +	unix_state_unlock(sk);
>  	return timeo;
>  }
>  
> @@ -1816,12 +1816,12 @@ static int unix_shutdown(struct socket *sock, int mode)
>  	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
>  
>  	if (mode) {
> -		unix_state_wlock(sk);
> +		unix_state_lock(sk);
>  		sk->sk_shutdown |= mode;
>  		other=unix_peer(sk);
>  		if (other)
>  			sock_hold(other);
> -		unix_state_wunlock(sk);
> +		unix_state_unlock(sk);
>  		sk->sk_state_change(sk);
>  
>  		if (other &&
> @@ -1833,9 +1833,9 @@ static int unix_shutdown(struct socket *sock, int mode)
>  				peer_mode |= SEND_SHUTDOWN;
>  			if (mode&SEND_SHUTDOWN)
>  				peer_mode |= RCV_SHUTDOWN;
> -			unix_state_wlock(other);
> +			unix_state_lock(other);
>  			other->sk_shutdown |= peer_mode;
> -			unix_state_wunlock(other);
> +			unix_state_unlock(other);
>  			other->sk_state_change(other);
>  			read_lock(&other->sk_callback_lock);
>  			if (peer_mode == SHUTDOWN_MASK)
> @@ -1973,7 +1973,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
>  	else {
>  		struct sock *s = v;
>  		struct unix_sock *u = unix_sk(s);
> -		unix_state_rlock(s);
> +		unix_state_lock(s);
>  
>  		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
>  			s,
> @@ -2001,7 +2001,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
>  			for ( ; i < len; i++)
>  				seq_putc(seq, u->addr->name->sun_path[i]);
>  		}
> -		unix_state_runlock(s);
> +		unix_state_unlock(s);
>  		seq_putc(seq, '\n');
>  	}
>  
> --------------------
> commit 19fec3e807a487415e77113cb9dbdaa2da739836
> Author: David S. Miller <davem@...set.davemloft.net>
> Date:   Thu May 31 15:19:20 2007 -0700
> 
>     [AF_UNIX]: Fix datagram connect race causing an OOPS.
>     
>     Based upon an excellent bug report and initial patch by
>     Frederik Deweerdt.
>     
>     The UNIX datagram connect code blindly dereferences other->sk_socket
>     via the call down to the security_unix_may_send() function.
>     
>     Without locking 'other' that pointer can go NULL via unix_release_sock()
>     which does sock_orphan() which also marks the socket SOCK_DEAD.
>     
>     So we have to lock both 'sk' and 'other' yet avoid all kinds of
>     potential deadlocks (connect to self is OK for datagram sockets and it
>     is possible for two datagram sockets to perform a simultaneous connect
>     to each other).  So what we do is have a "double lock" function similar
>     to how we handle this situation in other areas of the kernel.  We take
>     the lock of the socket pointer with the smallest address first in
>     order to avoid ABBA style deadlocks.
>     
>     Once we have them both locked, we check to see if SOCK_DEAD is set
>     for 'other' and if so, drop everything and retry the lookup.
>     
>     Signed-off-by: David S. Miller <davem@...emloft.net>
> 
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 453ede8..87c794d 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -858,6 +858,31 @@ out_mknod_parent:
>  	goto out_up;
>  }
>  
> +static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
> +{
> +	if (unlikely(sk1 == sk2) || !sk2) {
> +		unix_state_lock(sk1);
> +		return;
> +	}
> +	if (sk1 < sk2) {
> +		unix_state_lock(sk1);
> +		unix_state_lock_nested(sk2);
> +	} else {
> +		unix_state_lock(sk2);
> +		unix_state_lock_nested(sk1);
> +	}
> +}
> +
> +static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
> +{
> +	if (unlikely(sk1 == sk2) || !sk2) {
> +		unix_state_unlock(sk1);
> +		return;
> +	}
> +	unix_state_unlock(sk1);
> +	unix_state_unlock(sk2);
> +}
> +
>  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  			      int alen, int flags)
>  {
> @@ -877,11 +902,19 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
>  			goto out;
>  
> +restart:
>  		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
>  		if (!other)
>  			goto out;
>  
> -		unix_state_lock(sk);
> +		unix_state_double_lock(sk, other);
> +
> +		/* Apparently VFS overslept socket death. Retry. */
> +		if (sock_flag(other, SOCK_DEAD)) {
> +			unix_state_double_unlock(sk, other);
> +			sock_put(other);
> +			goto restart;
> +		}
>  
>  		err = -EPERM;
>  		if (!unix_may_send(sk, other))
> @@ -896,7 +929,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  		 *	1003.1g breaking connected state with AF_UNSPEC
>  		 */
>  		other = NULL;
> -		unix_state_lock(sk);
> +		unix_state_double_lock(sk, other);
>  	}
>  
>  	/*
> @@ -905,19 +938,19 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
>  	if (unix_peer(sk)) {
>  		struct sock *old_peer = unix_peer(sk);
>  		unix_peer(sk)=other;
> -		unix_state_unlock(sk);
> +		unix_state_double_unlock(sk, other);
>  
>  		if (other != old_peer)
>  			unix_dgram_disconnected(sk, old_peer);
>  		sock_put(old_peer);
>  	} else {
>  		unix_peer(sk)=other;
> -		unix_state_unlock(sk);
> +		unix_state_double_unlock(sk, other);
>  	}
>  	return 0;
>  
>  out_unlock:
> -	unix_state_unlock(sk);
> +	unix_state_double_unlock(sk, other);
>  	sock_put(other);
>  out:
>  	return err;
> --------------------
> 
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ