lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 29 Jan 2009 15:16:24 -0800
From:	Stephen Hemminger <shemminger@...tta.com>
To:	Eric Dumazet <dada1@...mosbay.com>
Cc:	David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH 5/5] netfilter: convert x_tables to use RCU

On Fri, 30 Jan 2009 00:04:16 +0100
Eric Dumazet <dada1@...mosbay.com> wrote:

> Stephen Hemminger a écrit :
> > Replace existing reader/writer lock with Read-Copy-Update to
> > elminate the overhead of a read lock on each incoming packet.
> > This should reduce the overhead of iptables especially on SMP
> > systems.
> > 
> > The previous code used a reader-writer lock for two purposes.
> > The first was to ensure that the xt_table_info reference was not in
> > process of being changed. Since xt_table_info is only freed via one
> > routine, it was a direct conversion to RCU.
> > 
> > The other use of the reader-writer lock was to to block changes
> > to counters while they were being read. This synchronization was
> > fixed by the previous patch.  But still need to make sure table info
> > isn't going away.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@...tta.com>
> > 
> > 
> > ---
> >  include/linux/netfilter/x_tables.h |   10 ++++++-
> >  net/ipv4/netfilter/arp_tables.c    |   12 ++++-----
> >  net/ipv4/netfilter/ip_tables.c     |   12 ++++-----
> >  net/ipv6/netfilter/ip6_tables.c    |   12 ++++-----
> >  net/netfilter/x_tables.c           |   48 ++++++++++++++++++++++++++-----------
> >  5 files changed, 60 insertions(+), 34 deletions(-)
> > 
> > --- a/include/linux/netfilter/x_tables.h	2009-01-28 22:04:39.316517913 -0800
> > +++ b/include/linux/netfilter/x_tables.h	2009-01-28 22:14:54.648490491 -0800
> > @@ -352,8 +352,8 @@ struct xt_table
> >  	/* What hooks you will enter on */
> >  	unsigned int valid_hooks;
> >  
> > -	/* Lock for the curtain */
> > -	rwlock_t lock;
> > +	/* Lock for curtain */
> > +	spinlock_t lock;
> >  
> >  	/* Man behind the curtain... */
> >  	struct xt_table_info *private;
> > @@ -386,6 +386,12 @@ struct xt_table_info
> >  	/* Secret compartment */
> >  	seqcount_t *seq;
> >  
> > +	/* For the dustman... */
> > +	union {
> > +		struct rcu_head rcu;
> > +		struct work_struct work;
> > +	};
> > +
> >  	/* ipt_entry tables: one per CPU */
> >  	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
> >  	char *entries[1];
> > --- a/net/ipv4/netfilter/arp_tables.c	2009-01-28 22:13:16.423490077 -0800
> > +++ b/net/ipv4/netfilter/arp_tables.c	2009-01-28 22:14:54.648490491 -0800
> > @@ -238,8 +238,8 @@ unsigned int arpt_do_table(struct sk_buf
> >  	indev = in ? in->name : nulldevname;
> >  	outdev = out ? out->name : nulldevname;
> >  
> > -	read_lock_bh(&table->lock);
> > -	private = table->private;
> > +	rcu_read_lock_bh();
> > +	private = rcu_dereference(table->private);
> >  	table_base = (void *)private->entries[smp_processor_id()];
> >  	seq = per_cpu_ptr(private->seq, smp_processor_id());
> >  	e = get_entry(table_base, private->hook_entry[hook]);
> > @@ -315,7 +315,7 @@ unsigned int arpt_do_table(struct sk_buf
> >  			e = (void *)e + e->next_offset;
> >  		}
> >  	} while (!hotdrop);
> > -	read_unlock_bh(&table->lock);
> > +	rcu_read_unlock_bh();
> >  
> >  	if (hotdrop)
> >  		return NF_DROP;
> > @@ -1163,8 +1163,8 @@ static int do_add_counters(struct net *n
> >  		goto free;
> >  	}
> >  
> > -	write_lock_bh(&t->lock);
> > -	private = t->private;
> > +	rcu_read_lock_bh();
> > +	private = rcu_dereference(t->private);
> >  	if (private->number != num_counters) {
> >  		ret = -EINVAL;
> >  		goto unlock_up_free;
> > @@ -1179,7 +1179,7 @@ static int do_add_counters(struct net *n
> >  			   paddc,
> >  			   &i);
> >   unlock_up_free:
> > -	write_unlock_bh(&t->lock);
> > +	rcu_read_unlock_bh();
> >  	xt_table_unlock(t);
> >  	module_put(t->me);
> >   free:
> > --- a/net/ipv4/netfilter/ip_tables.c	2009-01-28 22:06:10.596739805 -0800
> > +++ b/net/ipv4/netfilter/ip_tables.c	2009-01-28 22:14:54.648490491 -0800
> > @@ -348,9 +348,9 @@ ipt_do_table(struct sk_buff *skb,
> >  	mtpar.family  = tgpar.family = NFPROTO_IPV4;
> >  	tgpar.hooknum = hook;
> >  
> > -	read_lock_bh(&table->lock);
> > +	rcu_read_lock_bh();
> >  	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
> > -	private = table->private;
> > +	private = rcu_dereference(table->private);
> >  	table_base = (void *)private->entries[smp_processor_id()];
> >  	seq = per_cpu_ptr(private->seq, smp_processor_id());
> >  	e = get_entry(table_base, private->hook_entry[hook]);
> > @@ -449,7 +449,7 @@ ipt_do_table(struct sk_buff *skb,
> >  		}
> >  	} while (!hotdrop);
> >  
> > -	read_unlock_bh(&table->lock);
> > +	rcu_read_unlock_bh();
> >  
> >  #ifdef DEBUG_ALLOW_ALL
> >  	return NF_ACCEPT;
> > @@ -1408,8 +1408,8 @@ do_add_counters(struct net *net, void __
> >  		goto free;
> >  	}
> >  
> > -	write_lock_bh(&t->lock);
> > -	private = t->private;
> > +	rcu_read_lock_bh();
> > +	private = rcu_dereference(t->private);
> 
> I feel litle bit nervous seeing a write_lock_bh() changed to a rcu_read_lock()

Facts, it is only updating entries on current cpu

> Also, add_counter_to_entry() is not using seqcount protection, so another thread
> doing an iptables -L in parallel with this thread will possibly get corrupted counters.
add_counter_to_entry is local to current CPU.


> (With write_lock_bh(), this corruption could not occur)
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ