lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20180228111956.3c6cab27@xeon-e3>
Date:   Wed, 28 Feb 2018 11:19:56 -0800
From:   Stephen Hemminger <stephen@...workplumber.org>
To:     Tonghao Zhang <xiangxia.m.yue@...il.com>
Cc:     netdev@...r.kernel.org
Subject: Re: [PATCH] tcp: Support tcp socket allocated counter in namespace.

On Mon, 12 Feb 2018 18:44:00 -0800
Tonghao Zhang <xiangxia.m.yue@...il.com> wrote:

> Sometimes, we want to know how many tcp sockets are in use
> different _net_ namespaces. It's a key resource metric. With
> this patch, we can get it via /proc/net/sockstat.
> 
> The 'alloc' show in /proc/net/sockstat is the total tcp
> sockets in the kernel. This patch moves it to namespace,
> via adding a counter because the previous counter is used
> in proto(e.g tcp, udp and sctp) memory management.
> 
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@...il.com>
> ---
>  include/net/netns/ipv4.h |  3 +++
>  include/net/tcp.h        |  2 ++
>  net/ipv4/proc.c          |  2 +-
>  net/ipv4/tcp.c           |  2 ++
>  net/ipv4/tcp_ipv4.c      | 34 ++++++++++++++++++++++++++++++++++
>  net/ipv4/tcp_minisocks.c |  3 +++
>  6 files changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 44668c2..85e91bd 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -68,6 +68,9 @@ struct netns_ipv4 {
>  
>  	struct inet_peer_base	*peers;
>  	struct sock  * __percpu	*tcp_sk;
> +#ifdef CONFIG_PROC_FS
> +	int __percpu *tcp_sock_allocated;
> +#endif
>  	struct netns_frags	frags;
>  #ifdef CONFIG_NETFILTER
>  	struct xt_table		*iptable_filter;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 093e967..4b24b6e 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1780,6 +1780,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>  int tcp_gro_complete(struct sk_buff *skb);
>  
>  void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
> +void tcp_sock_allocated_add(struct net *net, int val);
> +int tcp_sock_allocated_get(struct net *net);
>  
>  static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
>  {
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index dc5edc8..8a147f7 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -58,7 +58,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
>  	int orphans, sockets;
>  
>  	orphans = percpu_counter_sum_positive(&tcp_orphan_count);
> -	sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
> +	sockets = tcp_sock_allocated_get(net);
>  
>  	socket_seq_show(seq);
>  	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 874c931..77fe4a5 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -453,6 +453,8 @@ void tcp_init_sock(struct sock *sk)
>  	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
>  
>  	sk_sockets_allocated_inc(sk);
> +	if (likely(sk->sk_net_refcnt))
> +		tcp_sock_allocated_add(sock_net(sk), 1);
>  }
>  EXPORT_SYMBOL(tcp_init_sock);
>  
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 95738aa..a7bd0c4 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1928,6 +1928,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
>  	tcp_saved_syn_free(tp);
>  
>  	sk_sockets_allocated_dec(sk);
> +	if (likely(sk->sk_net_refcnt))
> +		tcp_sock_allocated_add(sock_net(sk), -1);
>  }
>  EXPORT_SYMBOL(tcp_v4_destroy_sock);
>  
> @@ -2446,6 +2448,28 @@ struct proto tcp_prot = {
>  };
>  EXPORT_SYMBOL(tcp_prot);
>  
> +void tcp_sock_allocated_add(struct net *net, int val)
> +{
> +#ifdef CONFIG_PROC_FS
> +	this_cpu_add(*net->ipv4.tcp_sock_allocated, val);
> +#endif
> +}
> +EXPORT_SYMBOL(tcp_sock_allocated_add);
> +
> +int tcp_sock_allocated_get(struct net *net)
> +{
> +#ifdef CONFIG_PROC_FS
> +	int cpu, res = 0;
> +
> +	for_each_possible_cpu(cpu)
> +		res += *per_cpu_ptr(net->ipv4.tcp_sock_allocated, cpu);
> +	return res;
> +#else
> +	return 0;
> +#endif
> +}
> +EXPORT_SYMBOL(tcp_sock_allocated_get);
> +
>  static void __net_exit tcp_sk_exit(struct net *net)
>  {
>  	int cpu;
> @@ -2455,6 +2479,10 @@ static void __net_exit tcp_sk_exit(struct net *net)
>  	for_each_possible_cpu(cpu)
>  		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
>  	free_percpu(net->ipv4.tcp_sk);
> +
> +#ifdef CONFIG_PROC_FS
> +	free_percpu(net->ipv4.tcp_sock_allocated);
> +#endif
>  }
>  
>  static int __net_init tcp_sk_init(struct net *net)
> @@ -2465,6 +2493,12 @@ static int __net_init tcp_sk_init(struct net *net)
>  	if (!net->ipv4.tcp_sk)
>  		return -ENOMEM;
>  
> +#ifdef CONFIG_PROC_FS
> +	net->ipv4.tcp_sock_allocated = alloc_percpu(int);
> +	if (!net->ipv4.tcp_sock_allocated)
> +		goto fail;
> +#endif
> +
>  	for_each_possible_cpu(cpu) {
>  		struct sock *sk;
>  
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index a8384b0c..573fe43 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -559,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
>  		newtp->rack.reo_wnd_persist = 0;
>  		newtp->rack.dsack_seen = 0;
>  
> +		if (likely(newsk->sk_net_refcnt))
> +			tcp_sock_allocated_add(sock_net(newsk), 1);
> +
>  		__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
>  	}
>  	return newsk;


This makes sense to me only if you get rid of the old tcp_sockets_allocated and
the proto field entry for .sockets_allocated.  In that case you are replacing a global
counter with a per-namespace counter and the impact should be minimal.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ