[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1292511761.2883.236.camel@edumazet-laptop>
Date: Thu, 16 Dec 2010 16:02:41 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: Jesper Dangaard Brouer <hawk@...x.dk>
Cc: Arnaldo Carvalho de Melo <acme@...radead.org>,
Steven Rostedt <srostedt@...hat.com>,
Alexander Duyck <alexander.h.duyck@...el.com>,
Stephen Hemminger <shemminger@...tta.com>,
netfilter-devel <netfilter-devel@...r.kernel.org>,
netdev <netdev@...r.kernel.org>,
Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@...el.com>
Subject: Re: Possible regression: Packet drops during iptables calls
Le jeudi 16 décembre 2010 à 15:29 +0100, Eric Dumazet a écrit :
> Le jeudi 16 décembre 2010 à 15:24 +0100, Jesper Dangaard Brouer a
> écrit :
>
> > In my case I think this will not help. I'll kill the cache anyways, as
> > the ruleset is 19MB and my CPU cache is 8MB.
> >
> >
>
> Yep ;)
>
> By the way, you speak of a 'possible regression', but we always masked
> BH while doing get_counters().
>
> Only very recent kernels are masking them for each unit (cpu) of work.
>
> There was attempt to use a lockless read for each counter (using a
> seqlock), but it was not completed. I guess we could do something to
> ressurect this idea.
>
>
Something like following patch :
net/ipv4/netfilter/ip_tables.c | 51 +++++++++++++------------------
1 files changed, 22 insertions(+), 29 deletions(-)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d63..ed54f80 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -293,6 +293,8 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
return (void *)entry + entry->next_offset;
}
+static DEFINE_PER_CPU(seqcount_t, counters_seq);
+
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff *skb,
@@ -311,6 +313,7 @@ ipt_do_table(struct sk_buff *skb,
unsigned int *stackptr, origptr, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
+ seqcount_t *seq;
/* Initialization */
ip = ip_hdr(skb);
@@ -364,7 +367,11 @@ ipt_do_table(struct sk_buff *skb,
goto no_match;
}
+ seq = &__get_cpu_var(counters_seq);
+ /* could be faster if we had this_cpu_write_seqcount_begin() */
+ write_seqcount_begin(seq);
ADD_COUNTER(e->counters, skb->len, 1);
+ write_seqcount_end(seq);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -877,6 +884,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
+
static void
get_counters(const struct xt_table_info *t,
struct xt_counters counters[])
@@ -884,42 +892,27 @@ get_counters(const struct xt_table_info *t,
struct ipt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU.
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
+
+ memset(counters, 0, sizeof(struct xt_counters) * t->size);
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqcount_t *seq = &per_cpu(counters_seq, cpu);
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(seq);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(seq, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists