[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1292518436.2883.393.camel@edumazet-laptop>
Date: Thu, 16 Dec 2010 17:53:56 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: Jesper Dangaard Brouer <hawk@...x.dk>
Cc: Patrick McHardy <kaber@...sh.net>,
Arnaldo Carvalho de Melo <acme@...radead.org>,
Steven Rostedt <srostedt@...hat.com>,
Alexander Duyck <alexander.h.duyck@...el.com>,
Stephen Hemminger <shemminger@...tta.com>,
netfilter-devel <netfilter-devel@...r.kernel.org>,
netdev <netdev@...r.kernel.org>,
Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@...el.com>
Subject: [PATCH v2 net-next-2.6] netfilter: ip_tables: dont block BH while
reading counters
Le jeudi 16 décembre 2010 à 17:07 +0100, Eric Dumazet a écrit :
> Here is a tested version : no need for a (buggy in previous patch)
> memset() if we use vzalloc()
>
> Note : We miss a this_cpu_write_seqcount_begin() interface.
> I'll bug lkml to get it asap.
Well, we have a faster solution :
Add seqcount in "struct xt_info_lock"
so that we make the increment pair once per table, not once per rule,
and we already have the seq address, so no need for
this_cpu_write_seqcount_begin() interface.
[PATCH v2 net-next-2.6] netfilter: ip_tables: dont block BH while reading counters
Using "iptables -L" with a lot of rules might have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.
Switch to a per_cpu seqcount scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).
Reported-by: Jesper Dangaard Brouer <hawk@...x.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
include/linux/netfilter/x_tables.h | 9 ++++-
net/ipv4/netfilter/ip_tables.c | 45 ++++++++-------------------
2 files changed, 21 insertions(+), 33 deletions(-)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 742bec0..7027762 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -473,6 +473,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
*/
struct xt_info_lock {
spinlock_t lock;
+ seqcount_t seq;
unsigned char readers;
};
DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
@@ -496,16 +497,20 @@ static inline void xt_info_rdlock_bh(void)
local_bh_disable();
lock = &__get_cpu_var(xt_info_locks);
- if (likely(!lock->readers++))
+ if (likely(!lock->readers++)) {
spin_lock(&lock->lock);
+ write_seqcount_begin(&lock->seq);
+ }
}
static inline void xt_info_rdunlock_bh(void)
{
struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
- if (likely(!--lock->readers))
+ if (likely(!--lock->readers)) {
+ write_seqcount_end(&lock->seq);
spin_unlock(&lock->lock);
+ }
local_bh_enable();
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d63..7fe3d7c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
struct ipt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU.
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqcount_t *seq = &per_cpu(xt_info_locks, cpu).seq;
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(seq);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(seq, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc(countersize);
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ipt_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists