[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4982A60F.8020005@cosmosbay.com>
Date: Fri, 30 Jan 2009 08:02:39 +0100
From: Eric Dumazet <dada1@...mosbay.com>
To: Stephen Hemminger <shemminger@...tta.com>
CC: David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH 5/5] netfilter: convert x_tables to use RCU
Eric Dumazet a écrit :
> Stephen Hemminger a écrit :
>> On Fri, 30 Jan 2009 00:04:16 +0100
>> Eric Dumazet <dada1@...mosbay.com> wrote:
>>
>>> Stephen Hemminger a écrit :
>>>> Replace existing reader/writer lock with Read-Copy-Update to
>>>> elminate the overhead of a read lock on each incoming packet.
>>>> This should reduce the overhead of iptables especially on SMP
>>>> systems.
>>>>
>>>> The previous code used a reader-writer lock for two purposes.
>>>> The first was to ensure that the xt_table_info reference was not in
>>>> process of being changed. Since xt_table_info is only freed via one
>>>> routine, it was a direct conversion to RCU.
>>>>
>>>> The other use of the reader-writer lock was to to block changes
>>>> to counters while they were being read. This synchronization was
>>>> fixed by the previous patch. But still need to make sure table info
>>>> isn't going away.
>>>>
>>>> Signed-off-by: Stephen Hemminger <shemminger@...tta.com>
>>>>
>>>>
>>>> ---
>>>> include/linux/netfilter/x_tables.h | 10 ++++++-
>>>> net/ipv4/netfilter/arp_tables.c | 12 ++++-----
>>>> net/ipv4/netfilter/ip_tables.c | 12 ++++-----
>>>> net/ipv6/netfilter/ip6_tables.c | 12 ++++-----
>>>> net/netfilter/x_tables.c | 48 ++++++++++++++++++++++++++-----------
>>>> 5 files changed, 60 insertions(+), 34 deletions(-)
>>>>
>>>> --- a/include/linux/netfilter/x_tables.h 2009-01-28 22:04:39.316517913 -0800
>>>> +++ b/include/linux/netfilter/x_tables.h 2009-01-28 22:14:54.648490491 -0800
>>>> @@ -352,8 +352,8 @@ struct xt_table
>>>> /* What hooks you will enter on */
>>>> unsigned int valid_hooks;
>>>>
>>>> - /* Lock for the curtain */
>>>> - rwlock_t lock;
>>>> + /* Lock for curtain */
>>>> + spinlock_t lock;
>>>>
>>>> /* Man behind the curtain... */
>>>> struct xt_table_info *private;
>>>> @@ -386,6 +386,12 @@ struct xt_table_info
>>>> /* Secret compartment */
>>>> seqcount_t *seq;
>>>>
>>>> + /* For the dustman... */
>>>> + union {
>>>> + struct rcu_head rcu;
>>>> + struct work_struct work;
>>>> + };
>>>> +
>>>> /* ipt_entry tables: one per CPU */
>>>> /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
>>>> char *entries[1];
>>>> --- a/net/ipv4/netfilter/arp_tables.c 2009-01-28 22:13:16.423490077 -0800
>>>> +++ b/net/ipv4/netfilter/arp_tables.c 2009-01-28 22:14:54.648490491 -0800
>>>> @@ -238,8 +238,8 @@ unsigned int arpt_do_table(struct sk_buf
>>>> indev = in ? in->name : nulldevname;
>>>> outdev = out ? out->name : nulldevname;
>>>>
>>>> - read_lock_bh(&table->lock);
>>>> - private = table->private;
>>>> + rcu_read_lock_bh();
>>>> + private = rcu_dereference(table->private);
>>>> table_base = (void *)private->entries[smp_processor_id()];
>>>> seq = per_cpu_ptr(private->seq, smp_processor_id());
>>>> e = get_entry(table_base, private->hook_entry[hook]);
>>>> @@ -315,7 +315,7 @@ unsigned int arpt_do_table(struct sk_buf
>>>> e = (void *)e + e->next_offset;
>>>> }
>>>> } while (!hotdrop);
>>>> - read_unlock_bh(&table->lock);
>>>> + rcu_read_unlock_bh();
>>>>
>>>> if (hotdrop)
>>>> return NF_DROP;
>>>> @@ -1163,8 +1163,8 @@ static int do_add_counters(struct net *n
>>>> goto free;
>>>> }
>>>>
>>>> - write_lock_bh(&t->lock);
>>>> - private = t->private;
>>>> + rcu_read_lock_bh();
>>>> + private = rcu_dereference(t->private);
>>>> if (private->number != num_counters) {
>>>> ret = -EINVAL;
>>>> goto unlock_up_free;
>>>> @@ -1179,7 +1179,7 @@ static int do_add_counters(struct net *n
>>>> paddc,
>>>> &i);
>>>> unlock_up_free:
>>>> - write_unlock_bh(&t->lock);
>>>> + rcu_read_unlock_bh();
>>>> xt_table_unlock(t);
>>>> module_put(t->me);
>>>> free:
>>>> --- a/net/ipv4/netfilter/ip_tables.c 2009-01-28 22:06:10.596739805 -0800
>>>> +++ b/net/ipv4/netfilter/ip_tables.c 2009-01-28 22:14:54.648490491 -0800
>>>> @@ -348,9 +348,9 @@ ipt_do_table(struct sk_buff *skb,
>>>> mtpar.family = tgpar.family = NFPROTO_IPV4;
>>>> tgpar.hooknum = hook;
>>>>
>>>> - read_lock_bh(&table->lock);
>>>> + rcu_read_lock_bh();
>>>> IP_NF_ASSERT(table->valid_hooks & (1 << hook));
>>>> - private = table->private;
>>>> + private = rcu_dereference(table->private);
>>>> table_base = (void *)private->entries[smp_processor_id()];
>>>> seq = per_cpu_ptr(private->seq, smp_processor_id());
>>>> e = get_entry(table_base, private->hook_entry[hook]);
>>>> @@ -449,7 +449,7 @@ ipt_do_table(struct sk_buff *skb,
>>>> }
>>>> } while (!hotdrop);
>>>>
>>>> - read_unlock_bh(&table->lock);
>>>> + rcu_read_unlock_bh();
>>>>
>>>> #ifdef DEBUG_ALLOW_ALL
>>>> return NF_ACCEPT;
>>>> @@ -1408,8 +1408,8 @@ do_add_counters(struct net *net, void __
>>>> goto free;
>>>> }
>>>>
>>>> - write_lock_bh(&t->lock);
>>>> - private = t->private;
>>>> + rcu_read_lock_bh();
>>>> + private = rcu_dereference(t->private);
>>> I feel litle bit nervous seeing a write_lock_bh() changed to a rcu_read_lock()
>> Facts, it is only updating entries on current cpu
>
> Yes, like done in ipt_do_table() ;)
>
> Fact is we need to tell other threads, running on other cpus, that an update
> of our entries is running.
>
> Let me check if your v4 and xt_counters abstraction already solved this problem.
Hum, I just checked and indeed there is a problem...
#define SUM_COUNTER(s,c) do { (s).bcnt += (c).bcnt; (s).pcnt += (c).pcnt; } while(0)
need to be changed to use
#define SUM_COUNTER(s, c) do { xt_incr_counter(s, (c).cnt, (c).pcnt);} while (0)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists