lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <E1Hv980-0006gQ-00@dorka.pomaz.szeredi.hu>
Date:	Mon, 04 Jun 2007 11:45:32 +0200
From:	Miklos Szeredi <miklos@...redi.hu>
To:	netdev@...r.kernel.org
CC:	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] fix race in AF_UNIX

> A recv() on an AF_UNIX, SOCK_STREAM socket can race with a
> send()+close() on the peer, causing recv() to return zero, even though
> the sent data should be received.
> 
> This happens if the send() and the close() is performed between
> skb_dequeue() and checking sk->sk_shutdown in unix_stream_recvmsg():
> 
> process A  skb_dequeue() returns NULL, there's no data in the socket queue
> process B  new data is inserted onto the queue by unix_stream_sendmsg()
> process B  sk->sk_shutdown is set to SHUTDOWN_MASK by unix_release_sock()
> process A  sk->sk_shutdown is checked, unix_release_sock() returns zero

This is only part of the story.  It turns out, there are other races
involving the garbage collector, that can throw away perfectly good
packets with AF_UNIX sockets in them.

The problems arise when a socket goes from installed to in-flight or
vica versa during garbage collection.  Since gc is done with a
spinlock held, this only shows up on SMP.

The following patch fixes it for me, but it's possibly the wrong
approach.

Signed-off-by: Miklos Szeredi <mszeredi@...e.cz>
---

Index: linux-2.6.22-rc2/net/unix/garbage.c
===================================================================
--- linux-2.6.22-rc2.orig/net/unix/garbage.c	2007-06-03 23:58:11.000000000 +0200
+++ linux-2.6.22-rc2/net/unix/garbage.c	2007-06-04 11:39:42.000000000 +0200
@@ -90,6 +90,7 @@
 static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */
 
 atomic_t unix_tot_inflight = ATOMIC_INIT(0);
+DECLARE_RWSEM(unix_gc_sem);
 
 
 static struct sock *unix_get_socket(struct file *filp)
@@ -169,7 +170,7 @@ static void maybe_unmark_and_push(struct
 
 void unix_gc(void)
 {
-	static DEFINE_MUTEX(unix_gc_sem);
+	static DEFINE_MUTEX(unix_gc_local_lock);
 	int i;
 	struct sock *s;
 	struct sk_buff_head hitlist;
@@ -179,9 +180,22 @@ void unix_gc(void)
 	 *	Avoid a recursive GC.
 	 */
 
-	if (!mutex_trylock(&unix_gc_sem))
+	if (!mutex_trylock(&unix_gc_local_lock))
 		return;
 
+
+	/*
+	 * unix_gc_sem protects against sockets going from in-flight to
+	 * installed
+	 *
+	 * Can't sleep on this, because skb_recv_datagram could be
+	 * waiting for a packet that is to be sent by the thread which
+	 * invoked the gc
+	 */
+	if (!down_write_trylock(&unix_gc_sem)) {
+		mutex_unlock(&unix_gc_local_lock);
+		return;
+	}
 	spin_lock(&unix_table_lock);
 
 	forall_unix_sockets(i, s)
@@ -207,8 +221,6 @@ void unix_gc(void)
 
 	forall_unix_sockets(i, s)
 	{
-		int open_count = 0;
-
 		/*
 		 *	If all instances of the descriptor are not
 		 *	in flight we are in use.
@@ -218,10 +230,20 @@ void unix_gc(void)
 		 *	In this case (see unix_create1()) we set artificial
 		 *	negative inflight counter to close race window.
 		 *	It is trick of course and dirty one.
+		 *
+		 *	Get the inflight counter first, then the open
+		 *	counter.  This avoids problems if racing with
+		 *	sendmsg
+		 *
+		 *	If just created socket is not yet attached to
+		 *	a file descriptor, assume open_count of 1
 		 */
+		int inflight_count = atomic_read(&unix_sk(s)->inflight);
+		int open_count = 1;
+
 		if (s->sk_socket && s->sk_socket->file)
 			open_count = file_count(s->sk_socket->file);
-		if (open_count > atomic_read(&unix_sk(s)->inflight))
+		if (open_count > inflight_count)
 			maybe_unmark_and_push(s);
 	}
 
@@ -302,11 +324,12 @@ void unix_gc(void)
 		u->gc_tree = GC_ORPHAN;
 	}
 	spin_unlock(&unix_table_lock);
+	up_write(&unix_gc_sem);
 
 	/*
 	 *	Here we are. Hitlist is filled. Die.
 	 */
 
 	__skb_queue_purge(&hitlist);
-	mutex_unlock(&unix_gc_sem);
+	mutex_unlock(&unix_gc_local_lock);
 }
Index: linux-2.6.22-rc2/include/net/af_unix.h
===================================================================
--- linux-2.6.22-rc2.orig/include/net/af_unix.h	2007-04-26 05:08:32.000000000 +0200
+++ linux-2.6.22-rc2/include/net/af_unix.h	2007-06-04 09:13:56.000000000 +0200
@@ -14,6 +14,7 @@ extern void unix_gc(void);
 
 extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 extern spinlock_t unix_table_lock;
+extern struct rw_semaphore unix_gc_sem;
 
 extern atomic_t unix_tot_inflight;
 
Index: linux-2.6.22-rc2/net/unix/af_unix.c
===================================================================
--- linux-2.6.22-rc2.orig/net/unix/af_unix.c	2007-06-03 23:58:11.000000000 +0200
+++ linux-2.6.22-rc2/net/unix/af_unix.c	2007-06-04 11:04:15.000000000 +0200
@@ -1572,6 +1572,7 @@ static int unix_dgram_recvmsg(struct kio
 
 	msg->msg_namelen = 0;
 
+	down_read(&unix_gc_sem);
 	mutex_lock(&u->readlock);
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
@@ -1629,6 +1630,7 @@ out_free:
 	skb_free_datagram(sk,skb);
 out_unlock:
 	mutex_unlock(&u->readlock);
+	up_read(&unix_gc_sem);
 out:
 	return err;
 }
@@ -1704,6 +1706,7 @@ static int unix_stream_recvmsg(struct ki
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
 
+	down_read(&unix_gc_sem);
 	mutex_lock(&u->readlock);
 
 	do
@@ -1732,6 +1735,7 @@ static int unix_stream_recvmsg(struct ki
 			if (!timeo)
 				break;
 			mutex_unlock(&u->readlock);
+			up_read(&unix_gc_sem);
 
 			timeo = unix_stream_data_wait(sk, timeo);
 
@@ -1739,6 +1743,7 @@ static int unix_stream_recvmsg(struct ki
 				err = sock_intr_errno(timeo);
 				goto out;
 			}
+			down_read(&unix_gc_sem);
 			mutex_lock(&u->readlock);
 			continue;
  unlock:
@@ -1810,6 +1815,7 @@ static int unix_stream_recvmsg(struct ki
 	} while (size);
 
 	mutex_unlock(&u->readlock);
+	up_read(&unix_gc_sem);
 	scm_recv(sock, msg, siocb->scm, flags);
 out:
 	return copied ? : err;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ