[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <498330B2.4060004@cosmosbay.com>
Date: Fri, 30 Jan 2009 17:54:10 +0100
From: Eric Dumazet <dada1@...mosbay.com>
To: Andi Kleen <andi@...stfloor.org>
CC: Patrick McHardy <kaber@...sh.net>,
"David S. Miller" <davem@...emloft.net>,
Netfilter Developers <netfilter-devel@...r.kernel.org>,
Linux Network Development list <netdev@...r.kernel.org>
Subject: Re: [PATCH] netfilter: unfold two critical loops in ip_packet_match()
Andi Kleen a écrit :
> Eric Dumazet <dada1@...mosbay.com> writes:
>
>> While doing oprofile tests I noticed two loops are not properly unrolled by gcc
>
> That's because nobody passed -funroll-loops. Did you try that for
> that file? Likely will need -O2 too
I dont want to unroll all loops, only those two :)
I wish gcc (4.3.2 here) was litle bit smarter :(
Without using -funroll-loops
size ipv4/netfilter/ip_tables.o
text data bss dec hex filename
6424 368 16 6808 1a98 net/ipv4/netfilter/ip_tables.o
With -funroll-loops :
size ipv4/netfilter/ip_tables.o
text data bss dec hex filename
7144 368 16 7528 1d68 net/ipv4/netfilter/ip_tables.o
With my patch and no -funroll-loops
text data bss dec hex filename
6488 368 16 6872 1ad8 net/ipv4/netfilter/ip_tables.o
>
>> +static unsigned long ifname_compare(const void *_a, const void *_b, const void *_mask)
>> +{
>> + const unsigned long *a = (const unsigned long *)_a;
>> + const unsigned long *b = (const unsigned long *)_b;
>> + const unsigned long *mask = (const unsigned long *)_mask;
>> + unsigned long ret;
>> +
>> + ret = (a[0] ^ b[0]) & mask[0];
>> + ret |= (a[1] ^ b[1]) & mask[1];
>> + if (IFNAMSIZ > 2 * sizeof(unsigned long))
>> + ret |= (a[2] ^ b[2]) & mask[2];
>> + if (IFNAMSIZ > 3 * sizeof(unsigned long))
>> + ret |= (a[3] ^ b[3]) & mask[3];
>
> That will silently break for IFNAMSIZ >= 4*sizeof(unsigned long)
> You should add a dummy loop for that or at least a BUILD_BUG_ON
It will also break the day we port linux to a 128 bits machine :)
Thanks Andi
(By the way, I still use the patch on arch/x86/oprofile/op_model_ppro.c
to have a working oprofile on my dev machine...)
[PATCH] netfilter: unfold two critical loops in ip_packet_match()
While doing oprofile tests I noticed two loops are not properly unrolled by gcc
Using hand coded unrolled loop provides nice speedup : ipt_do_table
credited of 2.52 % of cpu instead of 3.29 % in tbench, for a small
text size increase (62 bytes for both loops)
Signed-off-by: Eric Dumazet <dada1@...mosbay.com>
---
net/ipv4/netfilter/ip_tables.c | 34 ++++++++++++++++++-------------
1 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ef8b6ca..9298d0a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -65,6 +65,24 @@ do { \
#define inline
#endif
+static unsigned long ifname_compare(const void *_a, const void *_b, const void *_mask)
+{
+ const unsigned long *a = (const unsigned long *)_a;
+ const unsigned long *b = (const unsigned long *)_b;
+ const unsigned long *mask = (const unsigned long *)_mask;
+ unsigned long ret;
+
+ ret = (a[0] ^ b[0]) & mask[0];
+ if (IFNAMSIZ > sizeof(unsigned long))
+ ret |= (a[1] ^ b[1]) & mask[1];
+ if (IFNAMSIZ > 2 * sizeof(unsigned long))
+ ret |= (a[2] ^ b[2]) & mask[2];
+ if (IFNAMSIZ > 3 * sizeof(unsigned long))
+ ret |= (a[3] ^ b[3]) & mask[3];
+ BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
+ return ret;
+}
+
/*
We keep a set of rules for each CPU, so we can avoid write-locking
them in the softirq when updating the counters and therefore
@@ -83,7 +101,6 @@ ip_packet_match(const struct iphdr *ip,
const struct ipt_ip *ipinfo,
int isfrag)
{
- size_t i;
unsigned long ret;
#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
@@ -103,13 +120,7 @@ ip_packet_match(const struct iphdr *ip,
return false;
}
- /* Look for ifname matches; this should unroll nicely. */
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- ret |= (((const unsigned long *)indev)[i]
- ^ ((const unsigned long *)ipinfo->iniface)[i])
- & ((const unsigned long *)ipinfo->iniface_mask)[i];
- }
-
+ ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask);
if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ipinfo->iniface,
@@ -117,12 +128,7 @@ ip_packet_match(const struct iphdr *ip,
return false;
}
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- ret |= (((const unsigned long *)outdev)[i]
- ^ ((const unsigned long *)ipinfo->outiface)[i])
- & ((const unsigned long *)ipinfo->outiface_mask)[i];
- }
-
+ ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask);
if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ipinfo->outiface,
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists