[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5f1bed11-146b-3924-415e-a1e3fc471da5@solarflare.com>
Date: Tue, 26 Jun 2018 19:22:27 +0100
From: Edward Cree <ecree@...arflare.com>
To: <linux-net-drivers@...arflare.com>, <netdev@...r.kernel.org>
CC: <davem@...emloft.net>
Subject: [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP
processing on x86_64
When JITing an eBPF program on x86_64, also JIT a list_func that calls the
bpf_func in a loop. Since this is a direct call, it should perform better
than indirect-calling bpf_func in a loop.
Since getting the percpu 'redirect_info' variable is ugly and magic, pass
a pointer into the list_func as a parameter instead of calculating it
inside the loop. This is safe because BPF execution is not preemptible,
so the percpu variable can't get moved while we're using it.
Signed-off-by: Edward Cree <ecree@...arflare.com>
---
arch/x86/net/bpf_jit_comp.c | 164 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/filter.h | 19 +++--
kernel/bpf/core.c | 18 +++--
net/core/dev.c | 5 +-
4 files changed, 195 insertions(+), 11 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2580cd2e98b1..3e06dd79adda 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1060,6 +1060,169 @@ struct x64_jit_data {
struct jit_context ctx;
};
+static void try_do_jit_list(struct bpf_prog *bpf_prog,
+ unsigned int (*bpf_func)(const void *ctx,
+ const struct bpf_insn *insn))
+{
+ /* list_func takes three arguments:
+ * struct list_head *list [RDI]
+ * const struct bpf_insn *insn [RSI]
+ * const struct redirect_info *percpu_ri [RDX]
+ *
+ * Layout of struct bpf_work on x86_64:
+ * struct list_head {
+ * struct list_head *next; // 0x0
+ * struct list_head *prev; // 0x8
+ * } list; // 0x0
+ * void *ctx; 0x10
+ * struct redirect_info {
+ * u32 ifindex; // 0x18
+ * u32 flags; // 0x1c
+ * struct bpf_map *map; // 0x20
+ * struct bpf_map *map_to_flush; // 0x28
+ * unsigned long map_owner; // 0x30
+ * } ri; // 0x18
+ * unsigned long ret; // 0x38
+ * (total size 0x40 bytes)
+ *
+ * Desired function body:
+ * struct redirect_info *ri = percpu_ri; [R12]
+ * struct bpf_work *work; [RBP]
+ *
+ * list_for_each_entry(work, list, list) {
+ * work->ret = (*bpf_func)(work->ctx, insn);
+ * work->ri = *ri;
+ * }
+ *
+ * Assembly to emit:
+ * ; save CSRs
+ * push %rbx
+ * push %rbp
+ * push %r12
+ * ; stash pointer to redirect_info
+ * mov %rdx,%r12 ; ri = percpu_ri
+ * ; start list
+ * mov %rdi,%rbp ; head = list
+ * next: ; while (true) {
+ * mov (%rbp),%rbx ; rbx = head->next
+ * cmp %rbx,%rdi ; if (rbx == list)
+ * je out ; break
+ * mov %rbx,%rbp ; head = rbx
+ * push %rdi ; save list
+ * push %rsi ; save insn (is still arg2)
+ * ; struct bpf_work *work = head (container_of, but list is first member)
+ * mov 0x10(%rbp),%rdi; arg1 = work->ctx
+ * callq bpf_func ; rax = (*bpf_func)(work->ctx, insn)
+ * mov %rax,0x38(%rbp); work->ret = rax
+ * ; work->ri = *ri
+ * mov (%r12),%rdx
+ * mov %rdx,0x18(%rbp)
+ * mov 0x8(%r12),%rdx
+ * mov %rdx,0x20(%rbp)
+ * mov 0x10(%r12),%rdx
+ * mov %rdx,0x28(%rbp)
+ * mov 0x18(%r12),%rdx
+ * mov %rdx,0x30(%rbp)
+ * pop %rsi ; restore insn
+ * pop %rdi ; restore list
+ * jmp next ; }
+ * out:
+ * ; restore CSRs and return
+ * pop %r12
+ * pop %rbp
+ * pop %rbx
+ * retq
+ */
+ u8 *image, *prog, *from_to_out, *next;
+ struct bpf_binary_header *header;
+ int off, cnt = 0;
+ s64 jmp_offset;
+
+ /* Prog should be 81 bytes; let's round up to 128 */
+ header = bpf_jit_binary_alloc(128, &image, 1, jit_fill_hole);
+ prog = image;
+
+ /* push rbx */
+ EMIT1(0x53);
+ /* push rbp */
+ EMIT1(0x55);
+ /* push %r12 */
+ EMIT2(0x41, 0x54);
+ /* mov %rdx,%r12 */
+ EMIT3(0x49, 0x89, 0xd4);
+ /* mov %rdi,%rbp */
+ EMIT3(0x48, 0x89, 0xfd);
+ next = prog;
+ /* mov 0x0(%rbp),%rbx */
+ EMIT4(0x48, 0x8b, 0x5d, 0x00);
+ /* cmp %rbx,%rdi */
+ EMIT3(0x48, 0x39, 0xdf);
+ /* je out */
+ EMIT2(X86_JE, 0);
+ from_to_out = prog; /* record . to patch this jump later */
+ /* mov %rbx,%rbp */
+ EMIT3(0x48, 0x89, 0xdd);
+ /* push %rdi */
+ EMIT1(0x57);
+ /* push %rsi */
+ EMIT1(0x56);
+ /* mov 0x10(%rbp),%rdi */
+ EMIT4(0x48, 0x8b, 0x7d, 0x10);
+ /* e8 callq bpf_func */
+ jmp_offset = (u8 *)bpf_func - (prog + 5);
+ if (!is_simm32(jmp_offset)) {
+ pr_err("call out of range to BPF func %p from list image %p\n",
+ bpf_func, image);
+ goto fail;
+ }
+ EMIT1_off32(0xE8, jmp_offset);
+ /* mov %rax,0x38(%rbp) */
+ EMIT4(0x48, 0x89, 0x45, 0x38);
+ /* mov (%r12),%rdx */
+ EMIT4(0x49, 0x8b, 0x14, 0x24);
+ /* mov %rdx,0x18(%rbp) */
+ EMIT4(0x48, 0x89, 0x55, 0x18);
+ for (off = 0x8; off < 0x20; off += 0x8) {
+ /* mov off(%r12),%rdx */
+ EMIT4(0x49, 0x8b, 0x54, 0x24);
+ EMIT1(off);
+ /* mov %rdx,0x18+off(%rbp) */
+ EMIT4(0x48, 0x89, 0x55, 0x18 + off);
+ }
+ /* pop %rsi */
+ EMIT1(0x5e);
+ /* pop %rdi */
+ EMIT1(0x5f);
+ /* jmp next */
+ jmp_offset = next - (prog + 2);
+ if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+ goto fail;
+ EMIT2(0xeb, jmp_offset);
+ /* out: */
+ jmp_offset = prog - from_to_out;
+ if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+ goto fail;
+ from_to_out[-1] = jmp_offset;
+ /* pop %r12 */
+ EMIT2(0x41, 0x5c);
+ /* pop %rbp */
+ EMIT1(0x5d);
+ /* pop %rbx */
+ EMIT1(0x5b);
+ /* retq */
+ EMIT1(0xc3);
+ /* If we were wrong about how much space we needed, scream and shout */
+ WARN_ON(cnt != 81);
+ if (bpf_jit_enable > 1)
+ bpf_jit_dump(0, cnt, 0, image);
+ bpf_jit_binary_lock_ro(header);
+ bpf_prog->list_func = (void *)image;
+ bpf_prog->jited_list = 1;
+ return;
+fail:
+ bpf_jit_binary_free(header);
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
@@ -1176,6 +1339,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog->bpf_func = (void *)image;
prog->jited = 1;
prog->jited_len = proglen;
+ try_do_jit_list(prog, prog->bpf_func);
} else {
prog = orig_prog;
}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d813034e286..ad1e75bf0991 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,7 +517,8 @@ struct bpf_prog {
const struct bpf_insn *insn);
/* Takes a list of struct bpf_work */
void (*list_func)(struct list_head *list,
- const struct bpf_insn *insn);
+ const struct bpf_insn *insn,
+ const struct redirect_info *percpu_ri);
/* Instructions for interpreter */
union {
struct sock_filter insns[0];
@@ -532,7 +533,7 @@ struct sk_filter {
};
#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
-#define BPF_LIST_PROG_RUN(filter, list) (*(filter)->list_func)(list, (filter)->insnsi)
+#define BPF_LIST_PROG_RUN(filter, list, percpu) (*(filter)->list_func)(list, (filter)->insnsi, percpu)
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
@@ -638,10 +639,11 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
}
static __always_inline void bpf_list_prog_run_xdp(const struct bpf_prog *prog,
- struct list_head *list)
+ struct list_head *list,
+ const struct redirect_info *percpu_ri)
{
/* Caller must hold rcu_read_lock(), as per bpf_prog_run_xdp(). */
- BPF_LIST_PROG_RUN(prog, list);
+ BPF_LIST_PROG_RUN(prog, list, percpu_ri);
}
static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
@@ -756,6 +758,15 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
return (void *)addr;
}
+static inline struct bpf_binary_header *
+bpf_list_jit_binary_hdr(const struct bpf_prog *fp)
+{
+ unsigned long real_start = (unsigned long)fp->list_func;
+ unsigned long addr = real_start & PAGE_MASK;
+
+ return (void *)addr;
+}
+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
{
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c35da826cc3b..028be88c4af8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -621,15 +621,22 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
*/
void __weak bpf_jit_free(struct bpf_prog *fp)
{
- if (fp->jited) {
- struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+ struct bpf_binary_header *hdr;
+ if (fp->jited) {
+ hdr = bpf_jit_binary_hdr(fp);
bpf_jit_binary_unlock_ro(hdr);
bpf_jit_binary_free(hdr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
+ if (fp->jited_list) {
+ hdr = bpf_list_jit_binary_hdr(fp);
+ bpf_jit_binary_unlock_ro(hdr);
+ bpf_jit_binary_free(hdr);
+ }
+
bpf_prog_unlock_free(fp);
}
@@ -1358,13 +1365,13 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
#define LIST_PROG_NAME(stack_size) __bpf_list_prog_run##stack_size
#define DEFINE_BPF_LIST_PROG_RUN(stack_size) \
-static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct bpf_insn *insn) \
+static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct bpf_insn *insn, const struct redirect_info *percpu_ri) \
{ \
struct bpf_work *work; \
\
list_for_each_entry(work, list, list) { \
work->ret = PROG_NAME(stack_size)(work->ctx, insn); \
- work->ri = *this_cpu_ptr(&redirect_info); \
+ work->ri = *percpu_ri; \
} \
}
@@ -1398,7 +1405,8 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
#undef PROG_NAME_LIST
#define PROG_NAME_LIST(stack_size) LIST_PROG_NAME(stack_size),
static void (*list_interpreters[])(struct list_head *list,
- const struct bpf_insn *insn) = {
+ const struct bpf_insn *insn,
+ const struct redirect_info *percpu_ri) = {
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
diff --git a/net/core/dev.c b/net/core/dev.c
index 746112c22afd..7c1879045ef8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4214,6 +4214,7 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
struct sk_buff_head *list,
struct sk_buff_head *pass_list)
{
+ const struct redirect_info *percpu_ri = this_cpu_ptr(&redirect_info);
struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
struct bpf_work *bw;
struct sk_buff *skb;
@@ -4249,11 +4250,11 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
if (xdp_prog->list_func && (xdp_prog->jited_list ||
!xdp_prog->jited))
- bpf_list_prog_run_xdp(xdp_prog, &xdp_list);
+ bpf_list_prog_run_xdp(xdp_prog, &xdp_list, percpu_ri);
else
list_for_each_entry(bw, &xdp_list, list) {
bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
- bw->ri = *this_cpu_ptr(&redirect_info);
+ bw->ri = *percpu_ri;
}
for (i = 0; i < n; i++) {
Powered by blists - more mailing lists