lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100819152149.GA19744@Krystal>
Date:	Thu, 19 Aug 2010 11:21:49 -0400
From:	Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca>
To:	Changli Gao <xiaosuo@...il.com>
Cc:	Patrick McHardy <kaber@...sh.net>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <eric.dumazet@...il.com>,
	netfilter-devel@...r.kernel.org, netdev@...r.kernel.org
Subject: Re: [PATCH v2] netfilter: save the hash of the tuple in the
	original direction for latter use

* Changli Gao (xiaosuo@...il.com) wrote:
> Since we don't change the tuple in the original direction, we can save it
> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
> use.
> 
> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
> to get the raw hash, and __hash_bucket() is used to get the bucket id.
> 
> In SYN-flood case, early_drop() doesn't need to recompute the hash again.
> 
> Signed-off-by: Changli Gao <xiaosuo@...il.com>
> ---
> v2: use cmpxchg() to save 2 variables.
>  net/netfilter/nf_conntrack_core.c |  114 ++++++++++++++++++++++++++------------
>  1 file changed, 79 insertions(+), 35 deletions(-)
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index df3eedb..0e02205 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -65,14 +65,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
>  DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
>  EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
>  
> -static int nf_conntrack_hash_rnd_initted;
> -static unsigned int nf_conntrack_hash_rnd;
> -
> -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
> -				  u16 zone, unsigned int size, unsigned int rnd)
> +static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
>  {
>  	unsigned int n;
>  	u_int32_t h;
> +	static unsigned int rnd;

Why are you putting a hidden static variable here ? It should probably
be declared outside of the function scope.

> +
> +	if (unlikely(!rnd)) {
> +		unsigned int rand;
> +
> +		get_random_bytes(&rand, sizeof(rand));
> +		if (!rand)
> +			rand = 1;
> +		cmpxchg(&rnd, 0, rand);

This really belongs to a "init()" function, not in a dynamic check in
the __hash_conntrack hot path. If you do that a init time, then you
don't even need the cmpxchg.

Or maybe I completely misunderstand you goal here; maybe you are really
trying to re-calculate random bytes. But more explanation is called for,
because I really don't see where the value is brought back to 0.

Thanks,

Mathieu

> +	}
>  
>  	/* The direction must be ignored, so we hash everything up to the
>  	 * destination ports (which is a multiple of 4) and treat the last
> @@ -83,14 +89,29 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
>  		   zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
>  				 tuple->dst.protonum));
>  
> -	return ((u64)h * size) >> 32;
> +	return h;
> +}
> +
> +static u32 __hash_bucket(u32 __hash, unsigned int size)
> +{
> +	return ((u64)__hash * size) >> 32;
> +}
> +
> +static u32 hash_bucket(u32 __hash, const struct net *net)
> +{
> +	return __hash_bucket(__hash, net->ct.htable_size);
> +}
> +
> +static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
> +				  u16 zone, unsigned int size)
> +{
> +	return __hash_bucket(____hash_conntrack(tuple, zone), size);
>  }
>  
>  static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
>  				       const struct nf_conntrack_tuple *tuple)
>  {
> -	return __hash_conntrack(tuple, zone, net->ct.htable_size,
> -				nf_conntrack_hash_rnd);
> +	return __hash_conntrack(tuple, zone, net->ct.htable_size);
>  }
>  
>  bool
> @@ -292,13 +313,13 @@ static void death_by_timeout(unsigned long ul_conntrack)
>   * OR
>   * - Caller must lock nf_conntrack_lock before calling this function
>   */
> -struct nf_conntrack_tuple_hash *
> -__nf_conntrack_find(struct net *net, u16 zone,
> -		    const struct nf_conntrack_tuple *tuple)
> +static struct nf_conntrack_tuple_hash *
> +____nf_conntrack_find(struct net *net, u16 zone,
> +		      const struct nf_conntrack_tuple *tuple, u32 __hash)
>  {
>  	struct nf_conntrack_tuple_hash *h;
>  	struct hlist_nulls_node *n;
> -	unsigned int hash = hash_conntrack(net, zone, tuple);
> +	unsigned int hash = hash_bucket(__hash, net);
>  
>  	/* Disable BHs the entire time since we normally need to disable them
>  	 * at least once for the stats anyway.
> @@ -327,19 +348,27 @@ begin:
>  
>  	return NULL;
>  }
> +
> +struct nf_conntrack_tuple_hash *
> +__nf_conntrack_find(struct net *net, u16 zone,
> +		    const struct nf_conntrack_tuple *tuple)
> +{
> +	return ____nf_conntrack_find(net, zone, tuple,
> +				     ____hash_conntrack(tuple, zone));
> +}
>  EXPORT_SYMBOL_GPL(__nf_conntrack_find);
>  
>  /* Find a connection corresponding to a tuple. */
> -struct nf_conntrack_tuple_hash *
> -nf_conntrack_find_get(struct net *net, u16 zone,
> -		      const struct nf_conntrack_tuple *tuple)
> +static struct nf_conntrack_tuple_hash *
> +__nf_conntrack_find_get(struct net *net, u16 zone,
> +			const struct nf_conntrack_tuple *tuple, u32 __hash)
>  {
>  	struct nf_conntrack_tuple_hash *h;
>  	struct nf_conn *ct;
>  
>  	rcu_read_lock();
>  begin:
> -	h = __nf_conntrack_find(net, zone, tuple);
> +	h = ____nf_conntrack_find(net, zone, tuple, __hash);
>  	if (h) {
>  		ct = nf_ct_tuplehash_to_ctrack(h);
>  		if (unlikely(nf_ct_is_dying(ct) ||
> @@ -357,6 +386,14 @@ begin:
>  
>  	return h;
>  }
> +
> +struct nf_conntrack_tuple_hash *
> +nf_conntrack_find_get(struct net *net, u16 zone,
> +		      const struct nf_conntrack_tuple *tuple)
> +{
> +	return __nf_conntrack_find_get(net, zone, tuple,
> +				       ____hash_conntrack(tuple, zone));
> +}
>  EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
>  
>  static void __nf_conntrack_hash_insert(struct nf_conn *ct,
> @@ -409,7 +446,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
>  		return NF_ACCEPT;
>  
>  	zone = nf_ct_zone(ct);
> -	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
> +	/* reuse the __hash saved before */
> +	hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
>  	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
>  
>  	/* We're not in hash table, and we refuse to set up related
> @@ -567,25 +605,20 @@ static noinline int early_drop(struct net *net, unsigned int hash)
>  	return dropped;
>  }
>  
> -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
> -				   const struct nf_conntrack_tuple *orig,
> -				   const struct nf_conntrack_tuple *repl,
> -				   gfp_t gfp)
> +static struct nf_conn *
> +__nf_conntrack_alloc(struct net *net, u16 zone,
> +		     const struct nf_conntrack_tuple *orig,
> +		     const struct nf_conntrack_tuple *repl,
> +		     gfp_t gfp, u32 __hash)
>  {
>  	struct nf_conn *ct;
>  
> -	if (unlikely(!nf_conntrack_hash_rnd_initted)) {
> -		get_random_bytes(&nf_conntrack_hash_rnd,
> -				sizeof(nf_conntrack_hash_rnd));
> -		nf_conntrack_hash_rnd_initted = 1;
> -	}
> -
>  	/* We don't want any race condition at early drop stage */
>  	atomic_inc(&net->ct.count);
>  
>  	if (nf_conntrack_max &&
>  	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
> -		unsigned int hash = hash_conntrack(net, zone, orig);
> +		unsigned int hash = hash_bucket(__hash, net);
>  		if (!early_drop(net, hash)) {
>  			atomic_dec(&net->ct.count);
>  			if (net_ratelimit())
> @@ -616,7 +649,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
>  	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
>  	ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
>  	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
> -	ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
> +	/* save __hash for reusing when confirming */
> +	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = __hash;
>  	/* Don't set timer yet: wait for confirmation */
>  	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
>  	write_pnet(&ct->ct_net, net);
> @@ -643,6 +677,14 @@ out_free:
>  	return ERR_PTR(-ENOMEM);
>  #endif
>  }
> +
> +struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
> +				   const struct nf_conntrack_tuple *orig,
> +				   const struct nf_conntrack_tuple *repl,
> +				   gfp_t gfp)
> +{
> +	return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
> +}
>  EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
>  
>  void nf_conntrack_free(struct nf_conn *ct)
> @@ -664,7 +706,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
>  	       struct nf_conntrack_l3proto *l3proto,
>  	       struct nf_conntrack_l4proto *l4proto,
>  	       struct sk_buff *skb,
> -	       unsigned int dataoff)
> +	       unsigned int dataoff, u32 __hash)
>  {
>  	struct nf_conn *ct;
>  	struct nf_conn_help *help;
> @@ -678,7 +720,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
>  		return NULL;
>  	}
>  
> -	ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
> +	ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
> +				  __hash);
>  	if (IS_ERR(ct)) {
>  		pr_debug("Can't allocate conntrack.\n");
>  		return (struct nf_conntrack_tuple_hash *)ct;
> @@ -755,6 +798,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
>  	struct nf_conntrack_tuple_hash *h;
>  	struct nf_conn *ct;
>  	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
> +	u32 __hash;
>  
>  	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
>  			     dataoff, l3num, protonum, &tuple, l3proto,
> @@ -764,10 +808,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
>  	}
>  
>  	/* look for tuple match */
> -	h = nf_conntrack_find_get(net, zone, &tuple);
> +	__hash = ____hash_conntrack(&tuple, zone);
> +	h = __nf_conntrack_find_get(net, zone, &tuple, __hash);
>  	if (!h) {
>  		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
> -				   skb, dataoff);
> +				   skb, dataoff, __hash);
>  		if (!h)
>  			return NULL;
>  		if (IS_ERR(h))
> @@ -1307,8 +1352,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
>  			ct = nf_ct_tuplehash_to_ctrack(h);
>  			hlist_nulls_del_rcu(&h->hnnode);
>  			bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
> -						  hashsize,
> -						  nf_conntrack_hash_rnd);
> +						  hashsize);
>  			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
>  		}
>  	}

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ