[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1290165472.3034.109.camel@edumazet-laptop>
Date: Fri, 19 Nov 2010 12:17:52 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: David Miller <davem@...emloft.net>
Cc: Hagen Paul Pfeifer <hagen@...u.net>, netdev@...r.kernel.org,
Changli Gao <xiaosuo@...il.com>
Subject: [PATCH net-next-2.6] filter: optimize sk_run_filter
Le vendredi 19 novembre 2010 à 10:54 +0100, Eric Dumazet a écrit :
> I believe we should revert the u32 f_k = fentry->k; part
>
> fentry->k as is fast as f_k if stored on stack, and avoids one
> instruction if fentry->k is not needed.
>
>
A revert is not good on arches with decent number of registers (x86_64
for example).
Maybe have some CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS is needed, (or
already exist ?)
Here is the patch to save 400 bytes on x86_32, and really speedup the
damn thing on all arches.
Thanks
[PATCH net-next-2.6] filter: optimize sk_run_filter
remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.
As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.
On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
(filter: make sure filters dont read uninitialized memory)
Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.
This helps compiler to use cpu registers to hold fentry and A
accumulator.
On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)
# size net/core/filter.o net/core/filter_pre.o
text data bss dec hex filename
2948 0 0 2948 b84 net/core/filter.o
3349 0 0 3349 d15 net/core/filter_pre.o
on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
text data bss dec hex filename
5173 0 0 5173 1435 net/core/filter.o
5224 0 0 5224 1468 net/core/filter_pre.o
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
Cc: Changli Gao <xiaosuo@...il.com>
Cc: Hagen Paul Pfeifer <hagen@...u.net>
---
include/linux/filter.h | 2
net/core/filter.c | 93 +++++++++++++++++++-------------------
net/core/timestamping.c | 2
net/packet/af_packet.c | 2
4 files changed, 51 insertions(+), 48 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d7..447a775 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -147,7 +147,7 @@ struct sock;
extern int sk_filter(struct sock *sk, struct sk_buff *skb);
extern unsigned int sk_run_filter(struct sk_buff *skb,
- struct sock_filter *filter, int flen);
+ const struct sock_filter *filter);
extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
extern int sk_detach_filter(struct sock *sk);
extern int sk_chk_filter(struct sock_filter *filter, int flen);
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d..9e77b3c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
- * @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
{
void *ptr;
u32 A = 0; /* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
unsigned long memvalid = 0;
u32 tmp;
int k;
- int pc;
BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
- for (pc = 0; pc < flen; pc++) {
- const struct sock_filter *fentry = &filter[pc];
- u32 f_k = fentry->k;
+ for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define K (fentry->k)
+#else
+ const u32 K = fentry->k;
+#endif
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += f_k;
+ A += K;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= f_k;
+ A -= K;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= f_k;
+ A *= K;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= f_k;
+ A /= K;
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= f_k;
+ A &= K;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= f_k;
+ A |= K;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= f_k;
+ A <<= K;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= f_k;
+ A >>= K;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += f_k;
+ fentry += K;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > f_k) ? fentry->jt : fentry->jf;
+ fentry += (A > K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= f_k) ? fentry->jt : fentry->jf;
+ fentry += (A >= K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == f_k) ? fentry->jt : fentry->jf;
+ fentry += (A == K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & f_k) ? fentry->jt : fentry->jf;
+ fentry += (A & K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
- pc += (A > X) ? fentry->jt : fentry->jf;
+ fentry += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_X:
- pc += (A >= X) ? fentry->jt : fentry->jf;
+ fentry += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_X:
- pc += (A == X) ? fentry->jt : fentry->jf;
+ fentry += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_X:
- pc += (A & X) ? fentry->jt : fentry->jf;
+ fentry += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = f_k;
+ k = K;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = f_k;
+ k = K;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = f_k;
+ k = K;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + f_k;
+ k = X + K;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + f_k;
+ k = X + K;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + f_k;
+ k = X + K;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, f_k, 1, &tmp);
+ ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = f_k;
+ A = K;
continue;
case BPF_S_LDX_IMM:
- X = f_k;
+ X = K;
continue;
case BPF_S_LD_MEM:
- A = (memvalid & (1UL << f_k)) ?
- mem[f_k] : 0;
+ A = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_LDX_MEM:
- X = (memvalid & (1UL << f_k)) ?
- mem[f_k] : 0;
+ X = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -328,16 +331,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return f_k;
+ return K;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- memvalid |= 1UL << f_k;
- mem[f_k] = A;
+ memvalid |= 1UL << K;
+ mem[K] = A;
continue;
case BPF_S_STX:
- memvalid |= 1UL << f_k;
- mem[f_k] = X;
+ memvalid |= 1UL << K;
+ mem[K] = X;
continue;
default:
WARN_ON(1);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22..dac7ed6 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
if (likely(skb->dev &&
skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ return sk_run_filter(skb, ptp_filter);
else
return PTP_CLASS_NONE;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2096456..b6372dd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter != NULL)
- res = sk_run_filter(skb, filter->insns, filter->len);
+ res = sk_run_filter(skb, filter->insns);
rcu_read_unlock_bh();
return res;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists