lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 21 Feb 2019 08:15:23 +0000
From:   Al Viro <viro@...iv.linux.org.uk>
To:     netdev@...r.kernel.org
Subject: [RFC] coallocating struct socket and struct socket_wq

	All instances of struct socket are embedded into some
bigger structure - most into struct socket_alloc (with struct inode
following struct socket), some into struct tun_file or struct
tap_queue.

	In the last two cases the corresponding struct socket_wq
(the one whose address goes into socket->wq) is in the same
containing structure, right after struct socket.

	In case of struct socket_alloc, we allocate struct
socket_wq separately and set socket->wq before anyone sees
either (in sock_alloc_inode()).  In sock_destroy_inode()
they are both freed (struct sock_alloc immediately,
struct socket_wq - RCU-delayed).

	AFAICS, nothing ever reassigns socket->wq.  Could we
simply embed struct socket_wq into struct socket?  RCU delay
is not an issue - net/socket.c is non-modular, so call_rcu()-based
variant freeing both together is not horrible.  sock_alloc_inode()
would be simplified, tun/tap uses would simply lose the separate
socket_wq members.

	The only problem I see here is ____cacheline_aligned_in_smp
we have on struct socket_wq.  Could we simply make it the first
field in struct socket?  Without lockdep they are reasonably small -
on amd64 socket_wq is 64 bytes, while the rest of struct socket is
48 (and pointer to wq would obviously disappear).

	Or is there something subtle I'm missing here?  What I
have in mind is something along the lines of

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index c0b52e48f0e6..7aedc748fbd0 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -519,8 +519,7 @@ static int tap_open(struct inode *inode, struct file *file)
 		goto err;
 	}
 
-	RCU_INIT_POINTER(q->sock.wq, &q->wq);
-	init_waitqueue_head(&q->wq.wait);
+	init_waitqueue_head(&q->sock.wq.wait);
 	q->sock.type = SOCK_RAW;
 	q->sock.state = SS_CONNECTED;
 	q->sock.file = file;
@@ -578,7 +577,7 @@ static __poll_t tap_poll(struct file *file, poll_table *wait)
 		goto out;
 
 	mask = 0;
-	poll_wait(file, &q->wq.wait, wait);
+	poll_wait(file, &q->sock.wq.wait, wait);
 
 	if (!ptr_ring_empty(&q->ring))
 		mask |= EPOLLIN | EPOLLRDNORM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fed298c0cb39..51f023391998 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -169,7 +169,6 @@ struct tun_pcpu_stats {
 struct tun_file {
 	struct sock sk;
 	struct socket socket;
-	struct socket_wq wq;
 	struct tun_struct __rcu *tun;
 	struct fasync_struct *fasync;
 	/* only used for fasnyc */
@@ -2166,7 +2165,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
 		goto out;
 	}
 
-	add_wait_queue(&tfile->wq.wait, &wait);
+	add_wait_queue(&tfile->socket.wq.wait, &wait);
 	current->state = TASK_INTERRUPTIBLE;
 
 	while (1) {
@@ -2186,7 +2185,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&tfile->wq.wait, &wait);
+	remove_wait_queue(&tfile->socket.wq.wait, &wait);
 
 out:
 	*err = error;
@@ -3409,8 +3408,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	tfile->flags = 0;
 	tfile->ifindex = 0;
 
-	init_waitqueue_head(&tfile->wq.wait);
-	RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
+	init_waitqueue_head(&tfile->socket.wq.wait);
 
 	tfile->socket.file = file;
 	tfile->socket.ops = &tun_socket_ops;
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 8e66866c11be..915a187cfabd 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -62,7 +62,6 @@ struct tap_dev {
 struct tap_queue {
 	struct sock sk;
 	struct socket sock;
-	struct socket_wq wq;
 	int vnet_hdr_sz;
 	struct tap_dev __rcu *tap;
 	struct file *file;
diff --git a/include/linux/net.h b/include/linux/net.h
index e0930678c8bf..e6e0d7858c78 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -108,13 +108,13 @@ struct socket_wq {
  *  @wq: wait queue for several uses
  */
 struct socket {
+	struct socket_wq	wq;
 	socket_state		state;
 
 	short			type;
 
 	unsigned long		flags;
 
-	struct socket_wq	*wq;
 
 	struct file		*file;
 	struct sock		*sk;
diff --git a/include/net/sock.h b/include/net/sock.h
index 6679f3c120b0..c05c08487900 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1805,7 +1805,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	WARN_ON(parent->sk);
 	write_lock_bh(&sk->sk_callback_lock);
-	rcu_assign_pointer(sk->sk_wq, parent->wq);
+	rcu_assign_pointer(sk->sk_wq, &parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
@@ -2089,7 +2089,7 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
 				  poll_table *p)
 {
 	if (!poll_does_not_wait(p)) {
-		poll_wait(filp, &sock->wq->wait, p);
+		poll_wait(filp, &sock->wq.wait, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
diff --git a/net/core/sock.c b/net/core/sock.c
index 71ded4d8025c..d097f981e7d8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2808,7 +2808,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	if (sock) {
 		sk->sk_type	=	sock->type;
-		sk->sk_wq	=	sock->wq;
+		sk->sk_wq	=	&sock->wq;
 		sock->sk	=	sk;
 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
 	} else {
diff --git a/net/socket.c b/net/socket.c
index 643a1648fcc2..03cf4128b3ba 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -239,20 +239,13 @@ static struct kmem_cache *sock_inode_cachep __ro_after_init;
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
-	struct socket_wq *wq;
 
 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-	wq = kmalloc(sizeof(*wq), GFP_KERNEL);
-	if (!wq) {
-		kmem_cache_free(sock_inode_cachep, ei);
-		return NULL;
-	}
-	init_waitqueue_head(&wq->wait);
-	wq->fasync_list = NULL;
-	wq->flags = 0;
-	ei->socket.wq = wq;
+	init_waitqueue_head(&ei->socket.wq.wait);
+	ei->socket.wq.fasync_list = NULL;
+	ei->socket.wq.flags = 0;
 
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
@@ -263,15 +256,18 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void sock_destroy_inode(struct inode *inode)
+static void sock_destroy_inode_callback(struct rcu_head *head)
 {
-	struct socket_alloc *ei;
-
-	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	kfree_rcu(ei->socket.wq, rcu);
+	struct socket_alloc *ei = container_of(head, struct socket_alloc,
+						vfs_inode.i_rcu);
 	kmem_cache_free(sock_inode_cachep, ei);
 }
 
+static void sock_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, sock_destroy_inode_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct socket_alloc *ei = (struct socket_alloc *)foo;
@@ -583,7 +579,7 @@ static void __sock_release(struct socket *sock, struct inode *inode)
 		module_put(owner);
 	}
 
-	if (sock->wq->fasync_list)
+	if (sock->wq.fasync_list)
 		pr_err("%s: fasync list not empty!\n", __func__);
 
 	if (!sock->file) {
@@ -1183,7 +1179,7 @@ static int sock_fasync(int fd, struct file *filp, int on)
 		return -EINVAL;
 
 	lock_sock(sk);
-	wq = sock->wq;
+	wq = &sock->wq;
 	fasync_helper(fd, filp, on, &wq->fasync_list);
 
 	if (!wq->fasync_list)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ