lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 10 Mar 2014 10:58:56 -0700
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	John Fastabend <john.fastabend@...il.com>
Cc:	xiyou.wangcong@...il.com, jhs@...atatu.com, netdev@...r.kernel.org,
	davem@...emloft.net
Subject: Re: [RCU PATCH 09/14] net: sched: make cls_u32 lockless

On Mon, 2014-03-10 at 10:07 -0700, John Fastabend wrote:
> Make cls_u32 classifier safe to run without holding lock. This patch
> converts statistics that are kept in read section u32_classify into
> per cpu counters.
> 
> This patch was tested with a tight u32 filter add/delete loop while
> generating traffic with pktgen. By running pktgen on vlan devices
> created on top of a physical device we can hit the qdisc layer
> correctly. For ingress qdisc's a loopback cable was used.
> 
> for i in {1..100}; do
>         q=`echo $i%8|bc`;
>         echo -n "u32 tos: iteration $i on queue $q";
>         tc filter add dev p3p2 parent $p prio $i u32 match ip tos 0x10 0xff \
>                   action skbedit queue_mapping $q;
>         sleep 1;
>         tc filter del dev p3p2 prio $i;
> 
>         echo -n "u32 tos hash table: iteration $i on queue $q";
>         tc filter add dev p3p2 parent $p protocol ip prio $i handle 628: u32 divisor 1
>         tc filter add dev p3p2 parent $p protocol ip prio $i u32 \
>                 match ip protocol 17 0xff link 628: offset at 0 mask 0xf00 shift 6 plus 0
>         tc filter add dev p3p2 parent $p protocol ip prio $i u32 \
>                 ht 628:0 match ip tos 0x10 0xff action skbedit queue_mapping $q
>         sleep 2;
>         tc filter del dev p3p2 prio $i
>         sleep 1;
> done
> 
> Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
> ---
>  net/sched/cls_u32.c |  257 +++++++++++++++++++++++++++++++++------------------
>  1 file changed, 168 insertions(+), 89 deletions(-)
> 
> diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
> index 84c28da..e1f7119 100644
> --- a/net/sched/cls_u32.c
> +++ b/net/sched/cls_u32.c
> @@ -36,6 +36,7 @@
>  #include <linux/kernel.h>
>  #include <linux/string.h>
>  #include <linux/errno.h>
> +#include <linux/percpu.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/skbuff.h>
>  #include <net/netlink.h>
> @@ -43,40 +44,46 @@
>  #include <net/pkt_cls.h>
>  
>  struct tc_u_knode {
> -	struct tc_u_knode	*next;
> +	struct tc_u_knode __rcu	*next;
>  	u32			handle;
> -	struct tc_u_hnode	*ht_up;
> +	struct tc_u_hnode __rcu	*ht_up;
>  	struct tcf_exts		exts;
>  #ifdef CONFIG_NET_CLS_IND
>  	int			ifindex;
>  #endif
>  	u8			fshift;
>  	struct tcf_result	res;
> -	struct tc_u_hnode	*ht_down;
> +	struct tc_u_hnode __rcu	*ht_down;
>  #ifdef CONFIG_CLS_U32_PERF
> -	struct tc_u32_pcnt	*pf;
> +	struct tc_u32_pcnt __percpu *pf;
>  #endif
>  #ifdef CONFIG_CLS_U32_MARK
> -	struct tc_u32_mark	mark;
> +	u32			val;
> +	u32			mask;
> +	u32 __percpu		*pcpu_success;
>  #endif
> +	struct tcf_proto	*tp;
> +	struct rcu_head		rcu;
>  	struct tc_u32_sel	sel;
>  };
>  
>  struct tc_u_hnode {
> -	struct tc_u_hnode	*next;
> +	struct tc_u_hnode __rcu	*next;
>  	u32			handle;
>  	u32			prio;
>  	struct tc_u_common	*tp_c;
>  	int			refcnt;
>  	unsigned int		divisor;
> -	struct tc_u_knode	*ht[1];
> +	struct tc_u_knode __rcu	*ht[1];
> +	struct rcu_head		rcu;
>  };
>  
>  struct tc_u_common {
> -	struct tc_u_hnode	*hlist;
> +	struct tc_u_hnode __rcu	*hlist;
>  	struct Qdisc		*q;
>  	int			refcnt;
>  	u32			hgenerator;
> +	struct rcu_head		rcu;
>  };
>  
>  static inline unsigned int u32_hash_fold(__be32 key,
> @@ -95,7 +102,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
>  		unsigned int	  off;
>  	} stack[TC_U32_MAXDEPTH];
>  
> -	struct tc_u_hnode *ht = tp->root;
> +	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
>  	unsigned int off = skb_network_offset(skb);
>  	struct tc_u_knode *n;
>  	int sdepth = 0;
> @@ -103,27 +110,31 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
>  	int sel = 0;
>  #ifdef CONFIG_CLS_U32_PERF
>  	int j;
> +	struct tc_u32_pcnt *pf;
>  #endif
>  	int i, r;
>  
>  next_ht:
> -	n = ht->ht[sel];
> +	n = rcu_dereference_bh(ht->ht[sel]);
>  
>  next_knode:
>  	if (n) {
>  		struct tc_u32_key *key = n->sel.keys;
>  
>  #ifdef CONFIG_CLS_U32_PERF
> -		n->pf->rcnt += 1;
> +		pf = this_cpu_ptr(n->pf);
> +		pf->rcnt += 1;

Please prefer this_cpu_add() or this_cpu_inc()

>  		j = 0;
>  #endif
>  
>  #ifdef CONFIG_CLS_U32_MARK
> -		if ((skb->mark & n->mark.mask) != n->mark.val) {
> -			n = n->next;
> +		if ((skb->mark & n->mask) != n->val) {
> +			n = rcu_dereference_bh(n->next);
>  			goto next_knode;
>  		} else {
> -			n->mark.success++;
> +			u32 *success = this_cpu_ptr(n->pcpu_success);
> +
> +			*success = *success + 1;

Same here, this_cpu_add() or this_cpu_inc()

>  		}
>  #endif
>  
> @@ -138,37 +149,41 @@ next_knode:
>  			if (!data)
>  				goto out;
>  			if ((*data ^ key->val) & key->mask) {
> -				n = n->next;
> +				n = rcu_dereference_bh(n->next);
>  				goto next_knode;
>  			}
>  #ifdef CONFIG_CLS_U32_PERF
> -			n->pf->kcnts[j] += 1;
> +			pf = this_cpu_ptr(n->pf);
> +			pf->kcnts[j] += 1;

same here.

>  			j++;
>  #endif
>  		}
> -		if (n->ht_down == NULL) {
> +
> +		ht = rcu_dereference_bh(n->ht_down);
> +		if (!ht) {
>  check_terminal:
>  			if (n->sel.flags & TC_U32_TERMINAL) {
>  
>  				*res = n->res;
>  #ifdef CONFIG_NET_CLS_IND
>  				if (!tcf_match_indev(skb, n->ifindex)) {
> -					n = n->next;
> +					n = rcu_dereference_bh(n->next);
>  					goto next_knode;
>  				}
>  #endif
>  #ifdef CONFIG_CLS_U32_PERF
> -				n->pf->rhit += 1;
> +				pf = this_cpu_ptr(n->pf);
> +				pf->rhit += 1;


same

Otherwise, patch looks good, thanks !


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ