lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <498898BF.7060906@cosmosbay.com>
Date:	Tue, 03 Feb 2009 20:19:27 +0100
From:	Eric Dumazet <dada1@...mosbay.com>
To:	Stephen Hemminger <shemminger@...tta.com>
CC:	David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH 3/3] iptables: lock free counters (alternate version)

Eric Dumazet a écrit :
> Stephen Hemminger a écrit :
>> This is an alternative to earlier RCU/seqcount_t version of counters.
>> The counters operate as usual without locking, but when counters are rotated
>> around the CPU's entries.  RCU is used in two ways, first to handle the
>> counter rotation, second for replace.
>>
> 
> Is it a working patch or just a prototype ?
> 
>> Signed-off-by: Stephen Hemminger <shemminger@...tta.com>
>>
>> ---
>>  include/linux/netfilter/x_tables.h |   10 +++-
>>  net/ipv4/netfilter/arp_tables.c    |   73 +++++++++++++++++++++++++++---------
>>  net/ipv4/netfilter/ip_tables.c     |   68 ++++++++++++++++++++++++---------
>>  net/ipv6/netfilter/ip6_tables.c    |   75 ++++++++++++++++++++++++++-----------
>>  net/netfilter/x_tables.c           |   43 +++++++++++++++------
>>  5 files changed, 197 insertions(+), 72 deletions(-)
>>
>> --- a/include/linux/netfilter/x_tables.h	2009-02-02 15:06:39.893751845 -0800
>> +++ b/include/linux/netfilter/x_tables.h	2009-02-02 15:28:10.022574005 -0800
>> @@ -353,7 +353,7 @@ struct xt_table
>>  	unsigned int valid_hooks;
>>  
>>  	/* Lock for the curtain */
>> -	rwlock_t lock;
>> +	struct mutex lock;
>>  
>>  	/* Man behind the curtain... */
>>  	struct xt_table_info *private;
>> @@ -383,9 +383,15 @@ struct xt_table_info
>>  	unsigned int hook_entry[NF_INET_NUMHOOKS];
>>  	unsigned int underflow[NF_INET_NUMHOOKS];
>>  
>> +	/* For the dustman... */
>> +	union {
>> +		struct rcu_head rcu;
>> +		struct work_struct work;
>> +	};
>> +
>>  	/* ipt_entry tables: one per CPU */
>>  	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
>> -	char *entries[1];
>> +	void *entries[1];
>>  };
>>  
>>  #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
>> --- a/net/ipv4/netfilter/ip_tables.c	2009-02-02 15:06:29.684249364 -0800
>> +++ b/net/ipv4/netfilter/ip_tables.c	2009-02-02 15:14:13.256499021 -0800
>> @@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb,
>>  	mtpar.family  = tgpar.family = NFPROTO_IPV4;
>>  	tgpar.hooknum = hook;
>>  
>> -	read_lock_bh(&table->lock);
>>  	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
>> -	private = table->private;
>> -	table_base = (void *)private->entries[smp_processor_id()];
>> +
>> +	rcu_read_lock_bh();
>> +	private = rcu_dereference(table->private);
>> +	table_base = rcu_dereference(private->entries[smp_processor_id()]);
>> +
>>  	e = get_entry(table_base, private->hook_entry[hook]);
>>  
>>  	/* For return from builtin chain */
>> @@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb,
>>  		}
>>  	} while (!hotdrop);
>>  
>> -	read_unlock_bh(&table->lock);
>> +	rcu_read_unlock_bh();
>>  
>>  #ifdef DEBUG_ALLOW_ALL
>>  	return NF_ACCEPT;
>> @@ -892,45 +894,73 @@ set_entry_to_counter(const struct ipt_en
>>  	return 0;
>>  }
>>  
>> +static inline int
>> +set_counter_to_entry(struct ipt_entry *e,
>> +		     const struct ipt_counters total[],
>> +		     unsigned int *i)
>> +{
>> +	SET_COUNTER(e->counters, total[*i].bcnt, total[*i].pcnt);
>> +
>> +	(*i)++;
>> +	return 0;
>> +}
>> +
>> +
>>  static void
>> -get_counters(const struct xt_table_info *t,
>> +get_counters(struct xt_table_info *t,
>>  	     struct xt_counters counters[])
>>  {
>>  	unsigned int cpu;
>>  	unsigned int i;
>>  	unsigned int curcpu;
>> +	struct ipt_entry *e;
>>  
>> -	/* Instead of clearing (by a previous call to memset())
>> -	 * the counters and using adds, we set the counters
>> -	 * with data used by 'current' CPU
>> -	 * We dont care about preemption here.
>> -	 */
>> +	preempt_disable();
>>  	curcpu = raw_smp_processor_id();
>> -
>> +	e = t->entries[curcpu];
>>  	i = 0;
>> -	IPT_ENTRY_ITERATE(t->entries[curcpu],
>> +	IPT_ENTRY_ITERATE(e,
>>  			  t->size,
>>  			  set_entry_to_counter,
> 
> Hum, current cpu might be interrupted by NIC, since you only disabled preemption.
> set_entry_to_counter() might get garbage.
> I suppose I already mentioned it :)
> 
>>  			  counters,
>>  			  &i);
>>  
>>  	for_each_possible_cpu(cpu) {
>> +		void *p;
>> +
>>  		if (cpu == curcpu)
>>  			continue;
>> +
>> +		/* Swizzle the values and wait */
>> +		e->counters = ((struct xt_counters) { 0, 0 });
> 
> I dont see what you want to do here...
> 
> e->counters is the counter associated with rule #0
> 
>> +		p = t->entries[cpu];
>> +		rcu_assign_pointer(t->entries[cpu], e);
>> +		synchronize_net();
> 
> 
> Oh well, not this synchronize_net() :)
> 
> This wont provide atomic sampling of counters for whole CPUS, and introduce large delays
> on big machines (NR_CPUS >= 64)
> 
> What problem do we want to solve here ?
> 
> 
> Current iptables is able to do an atomic snapshot because of the rwlock.
> 
> If we really want to keep this feature (but get rid of rwlock), we might do the reverse
> with two seqlocks + RCU
> 
> One seqlock (seqlock_counters) to protect counter updates
> One seqlock (seqlock_rules) to protect rules changes
> 
> Ie :
> 
> ipt_do_table() doing :
> {
> rcu_read_lock 
> read_seqbegin(&table->seqlock_rules);
> rcu fetch priv table pointer and work on it
>  do {
>  for all counters updates, use 
>  do {
>    seq = read_seqbegin(table->seqlock_counters);

Hum... reading my mail, this one can be done once only, at the beginning

Why then I suggested two different seqlocks, I am wondering :)

I need to think a litle bit more, we might do something to allow several "iptables -L" in //

maybe an atomic_t to protect counters would be ok :


Ie :

One atomic_t (counters_readers) to protect counter updates
One seqlock (seqlock_rules) to protect rules changes



ipt_do_table() doing :
{
begin:
read_seqbegin(&table->seqlock_rules); // it could loop but with BH enabled
rcu_read_lock_bh();
while (atomic_read(&table->counters_readers) > 0) {
   cpu_relax();
   rcu_read_unlock_bh();
   goto begin;
}
private = rcu_dereference(table->private);
...
 do {
 for all counters updates, use 
 do {
   update counters
   }
 } while (!hotdrop);
rcu_read_unlock_bh()
}


for get_counter() (iptables -L)
atomic_inc(&table->counters_readers);
waiting one RCU grace period, 
{
get / sum all counters (no updates of counters are allowed)
}
atomic_dec(&table->counters_readers)



for iptables_update/replace (iptables -A|I|D|R|Z|F...)
writer doing a write_seqlock(&table->seqlock_rules), waiting one RCU grace period, 
{
change rules/counters
}
write_sequnlock();





--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ