lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 10 Dec 2017 07:12:05 -0800
From:   Tonghao Zhang <xiangxia.m.yue@...il.com>
To:     davem@...emloft.net, xiyou.wangcong@...il.com, edumazet@...gle.com,
        willemb@...gle.com, xemul@...nvz.org
Cc:     netdev@...r.kernel.org, Tonghao Zhang <xiangxia.m.yue@...il.com>
Subject: [PATCH v6 2/3] sock: Move the socket inuse to namespace.

In some case, we want to know how many sockets are in use in
different _net_ namespaces. It's a key resource metric.

This patch adds a member in struct netns_core. This is a counter
for socket-inuse in the _net_ namespace. The patch will add/sub
counter in the sk_alloc, sk_clone_lock and __sk_free.

The main reasons for doing this are that:

1. When linux calls the 'do_exit' for processes to exit, the functions
'exit_task_namespaces' and 'exit_task_work' will be called sequentially.
'exit_task_namespaces' may have destroyed the _net_ namespace, but
'sock_release' called in 'exit_task_work' may use the _net_ namespace
if we counter the socket-inuse in sock_release.

2. socket and sock are in pair. More important, sock holds the _net_
namespace. We counter the socket-inuse in sock, for avoiding holding
_net_ namespace again in socket. It's a easy way to maintain the code.

3. We alloc the sock_inuse in net_alloc() and free it in net_free()
because we should make sure that the sock_inuse will not be used anymore
after we release it. Notice that some sockets (e.g netlink socket created
in kernel) will be released after all of the network namespace exit methods.
For more details, see the cleanup_net. Then, we should not use the per
network namespace operations to malloc the sock_inuse.

Signed-off-by: Martin Zhang <zhangjunweimartin@...ichuxing.com>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@...il.com>
---
 include/net/netns/core.h |  3 +++
 include/net/sock.h       |  1 +
 net/core/net_namespace.c | 10 ++++++++++
 net/core/sock.c          | 26 ++++++++++++++++++++++++++
 net/socket.c             | 21 ++-------------------
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 45cfb5d..a5e8a66 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -11,6 +11,9 @@ struct netns_core {
 
 	int	sysctl_somaxconn;
 
+#ifdef CONFIG_PROC_FS
+	int __percpu *sock_inuse;
+#endif
 	struct prot_inuse __percpu *prot_inuse;
 };
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 9155da4..44f4890 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1262,6 +1262,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk)
 /* Called with local bh disabled */
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
 int sock_prot_inuse_get(struct net *net, struct proto *proto);
+int sock_inuse_get(struct net *net);
 #else
 static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
 		int inc)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832..6c191fb 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -363,6 +363,13 @@ static struct net *net_alloc(void)
 	if (!net)
 		goto out_free;
 
+#ifdef CONFIG_PROC_FS
+	net->core.sock_inuse = alloc_percpu(int);
+	if (!net->core.sock_inuse) {
+		kmem_cache_free(net_cachep, net);
+		goto out_free;
+	}
+#endif
 	rcu_assign_pointer(net->gen, ng);
 out:
 	return net;
@@ -374,6 +381,9 @@ static struct net *net_alloc(void)
 
 static void net_free(struct net *net)
 {
+#ifdef CONFIG_PROC_FS
+	free_percpu(net->core.sock_inuse);
+#endif
 	kfree(rcu_access_pointer(net->gen));
 	kmem_cache_free(net_cachep, net);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index c2dd2d3..f6974eb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,8 @@
 static DEFINE_MUTEX(proto_list_mutex);
 static LIST_HEAD(proto_list);
 
+static void sock_inuse_add(struct net *net, int val);
+
 /**
  * sk_ns_capable - General socket capability test
  * @sk: Socket to use a capability on or through
@@ -1534,6 +1536,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		if (likely(sk->sk_net_refcnt))
 			get_net(net);
 		sock_net_set(sk, net);
+		sock_inuse_add(net, 1);
 		refcount_set(&sk->sk_wmem_alloc, 1);
 
 		mem_cgroup_sk_alloc(sk);
@@ -1595,6 +1598,8 @@ void sk_destruct(struct sock *sk)
 
 static void __sk_free(struct sock *sk)
 {
+	sock_inuse_add(sock_net(sk), -1);
+
 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
 		sock_diag_broadcast_destroy(sk);
 	else
@@ -1716,6 +1721,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_priority = 0;
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
 		atomic64_set(&newsk->sk_cookie, 0);
+		sock_inuse_add(sock_net(newsk), 1);
 
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -3061,6 +3067,22 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
 
+static void sock_inuse_add(struct net *net, int val)
+{
+	this_cpu_add(*net->core.sock_inuse, val);
+}
+
+int sock_inuse_get(struct net *net)
+{
+	int cpu, res = 0;
+
+	for_each_possible_cpu(cpu)
+		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
+
+	return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_inuse_get);
+
 static int __net_init sock_inuse_init_net(struct net *net)
 {
 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
@@ -3112,6 +3134,10 @@ static inline void assign_proto_idx(struct proto *prot)
 static inline void release_proto_idx(struct proto *prot)
 {
 }
+
+static void sock_inuse_add(struct net *net, int val)
+{
+}
 #endif
 
 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
diff --git a/net/socket.c b/net/socket.c
index 05f361f..bbd2e9c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -163,12 +163,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 
 /*
- *	Statistics counters of the socket lists
- */
-
-static DEFINE_PER_CPU(int, sockets_in_use);
-
-/*
  * Support routines.
  * Move socket addresses back and forth across the kernel/user
  * divide and look after the messy bits.
@@ -578,7 +572,6 @@ struct socket *sock_alloc(void)
 	inode->i_gid = current_fsgid();
 	inode->i_op = &sockfs_inode_ops;
 
-	this_cpu_add(sockets_in_use, 1);
 	return sock;
 }
 EXPORT_SYMBOL(sock_alloc);
@@ -605,7 +598,6 @@ void sock_release(struct socket *sock)
 	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
 		pr_err("%s: fasync list not empty!\n", __func__);
 
-	this_cpu_sub(sockets_in_use, 1);
 	if (!sock->file) {
 		iput(SOCK_INODE(sock));
 		return;
@@ -2622,17 +2614,8 @@ static int __init sock_init(void)
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
-	int cpu;
-	int counter = 0;
-
-	for_each_possible_cpu(cpu)
-	    counter += per_cpu(sockets_in_use, cpu);
-
-	/* It can be negative, by the way. 8) */
-	if (counter < 0)
-		counter = 0;
-
-	seq_printf(seq, "sockets: used %d\n", counter);
+	seq_printf(seq, "sockets: used %d\n",
+		   sock_inuse_get(seq->private));
 }
 #endif				/* CONFIG_PROC_FS */
 
-- 
1.8.3.1

Powered by blists - more mailing lists