lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5f1bed11-146b-3924-415e-a1e3fc471da5@solarflare.com>
Date:   Tue, 26 Jun 2018 19:22:27 +0100
From:   Edward Cree <ecree@...arflare.com>
To:     <linux-net-drivers@...arflare.com>, <netdev@...r.kernel.org>
CC:     <davem@...emloft.net>
Subject: [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP
 processing on x86_64

When JITing an eBPF program on x86_64, also JIT a list_func that calls the
 bpf_func in a loop.  Since this is a direct call, it should perform better
 than indirect-calling bpf_func in a loop.
Since getting the percpu 'redirect_info' variable is ugly and magic, pass
 a pointer into the list_func as a parameter instead of calculating it
 inside the loop.  This is safe because BPF execution is not preemptible,
 so the percpu variable can't get moved while we're using it.

Signed-off-by: Edward Cree <ecree@...arflare.com>
---
 arch/x86/net/bpf_jit_comp.c | 164 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h      |  19 +++--
 kernel/bpf/core.c           |  18 +++--
 net/core/dev.c              |   5 +-
 4 files changed, 195 insertions(+), 11 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2580cd2e98b1..3e06dd79adda 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1060,6 +1060,169 @@ struct x64_jit_data {
 	struct jit_context ctx;
 };
 
+static void try_do_jit_list(struct bpf_prog *bpf_prog,
+			    unsigned int (*bpf_func)(const void *ctx,
+						     const struct bpf_insn *insn))
+{
+	/* list_func takes three arguments:
+	 * struct list_head *list [RDI]
+	 * const struct bpf_insn *insn [RSI]
+	 * const struct redirect_info *percpu_ri [RDX]
+	 *
+	 * Layout of struct bpf_work on x86_64:
+	 * struct list_head {
+	 *	struct list_head *next; // 0x0
+	 *	struct list_head *prev; // 0x8
+	 * } list; // 0x0
+	 * void *ctx; 0x10
+	 * struct redirect_info {
+	 *	u32 ifindex; // 0x18
+	 *	u32 flags; // 0x1c
+	 *	struct bpf_map *map; // 0x20
+	 *	struct bpf_map *map_to_flush; // 0x28
+	 *	unsigned long   map_owner; // 0x30
+	 * } ri; // 0x18
+	 * unsigned long ret; // 0x38
+	 * (total size 0x40 bytes)
+	 *
+	 * Desired function body:
+	 * struct redirect_info *ri = percpu_ri; [R12]
+	 * struct bpf_work *work; [RBP]
+	 *
+	 * list_for_each_entry(work, list, list) {
+	 *	work->ret = (*bpf_func)(work->ctx, insn);
+	 *	work->ri = *ri;
+	 * }
+	 *
+	 * Assembly to emit:
+	 * ; save CSRs
+	 *	push %rbx
+	 *	push %rbp
+	 *	push %r12
+	 * ; stash pointer to redirect_info
+	 *	mov %rdx,%r12	; ri = percpu_ri
+	 * ; start list
+	 *	mov %rdi,%rbp	; head = list
+	 * next:		; while (true) {
+	 *	mov (%rbp),%rbx	; rbx = head->next
+	 *	cmp %rbx,%rdi	; if (rbx == list)
+	 *	je out		;	break
+	 *	mov %rbx,%rbp	; head = rbx
+	 *	push %rdi	; save list
+	 *	push %rsi	; save insn (is still arg2)
+	 * ; struct bpf_work *work = head (container_of, but list is first member)
+	 *	mov 0x10(%rbp),%rdi; arg1 = work->ctx
+	 *	callq bpf_func  ; rax = (*bpf_func)(work->ctx, insn)
+	 *	mov %rax,0x38(%rbp); work->ret = rax
+	 * ; work->ri = *ri
+	 *	mov (%r12),%rdx
+	 *	mov %rdx,0x18(%rbp)
+	 *	mov 0x8(%r12),%rdx
+	 *	mov %rdx,0x20(%rbp)
+	 *	mov 0x10(%r12),%rdx
+	 *	mov %rdx,0x28(%rbp)
+	 *	mov 0x18(%r12),%rdx
+	 *	mov %rdx,0x30(%rbp)
+	 *	pop %rsi	; restore insn
+	 *	pop %rdi	; restore list
+	 *	jmp next	; }
+	 * out:
+	 * ; restore CSRs and return
+	 *	pop %r12
+	 *	pop %rbp
+	 *	pop %rbx
+	 *	retq
+	 */
+	u8 *image, *prog, *from_to_out, *next;
+	struct bpf_binary_header *header;
+	int off, cnt = 0;
+	s64 jmp_offset;
+
+	/* Prog should be 81 bytes; let's round up to 128 */
+	header = bpf_jit_binary_alloc(128, &image, 1, jit_fill_hole);
+	prog = image;
+
+	/* push rbx */
+	EMIT1(0x53);
+	/* push rbp */
+	EMIT1(0x55);
+	/* push %r12 */
+	EMIT2(0x41, 0x54);
+	/* mov %rdx,%r12 */
+	EMIT3(0x49, 0x89, 0xd4);
+	/* mov %rdi,%rbp */
+	EMIT3(0x48, 0x89, 0xfd);
+	next = prog;
+	/* mov 0x0(%rbp),%rbx */
+	EMIT4(0x48, 0x8b, 0x5d, 0x00);
+	/* cmp %rbx,%rdi */
+	EMIT3(0x48, 0x39, 0xdf);
+	/* je out */
+	EMIT2(X86_JE, 0);
+	from_to_out = prog; /* record . to patch this jump later */
+	/* mov %rbx,%rbp */
+	EMIT3(0x48, 0x89, 0xdd);
+	/* push %rdi */
+	EMIT1(0x57);
+	/* push %rsi */
+	EMIT1(0x56);
+	/* mov 0x10(%rbp),%rdi */
+	EMIT4(0x48, 0x8b, 0x7d, 0x10);
+	/* e8 callq bpf_func */
+	jmp_offset = (u8 *)bpf_func - (prog + 5);
+	if (!is_simm32(jmp_offset)) {
+		pr_err("call out of range to BPF func %p from list image %p\n",
+		       bpf_func, image);
+		goto fail;
+	}
+	EMIT1_off32(0xE8, jmp_offset);
+	/* mov %rax,0x38(%rbp) */
+	EMIT4(0x48, 0x89, 0x45, 0x38);
+	/* mov (%r12),%rdx */
+	EMIT4(0x49, 0x8b, 0x14, 0x24);
+	/* mov %rdx,0x18(%rbp) */
+	EMIT4(0x48, 0x89, 0x55, 0x18);
+	for (off = 0x8; off < 0x20; off += 0x8) {
+		/* mov off(%r12),%rdx */
+		EMIT4(0x49, 0x8b, 0x54, 0x24);
+		EMIT1(off);
+		/* mov %rdx,0x18+off(%rbp) */
+		EMIT4(0x48, 0x89, 0x55, 0x18 + off);
+	}
+	/* pop %rsi */
+	EMIT1(0x5e);
+	/* pop %rdi */
+	EMIT1(0x5f);
+	/* jmp next */
+	jmp_offset = next - (prog + 2);
+	if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+		goto fail;
+	EMIT2(0xeb, jmp_offset);
+	/* out: */
+	jmp_offset = prog - from_to_out;
+	if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+		goto fail;
+	from_to_out[-1] = jmp_offset;
+	/* pop %r12 */
+	EMIT2(0x41, 0x5c);
+	/* pop %rbp */
+	EMIT1(0x5d);
+	/* pop %rbx */
+	EMIT1(0x5b);
+	/* retq */
+	EMIT1(0xc3);
+	/* If we were wrong about how much space we needed, scream and shout */
+	WARN_ON(cnt != 81);
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(0, cnt, 0, image);
+	bpf_jit_binary_lock_ro(header);
+	bpf_prog->list_func = (void *)image;
+	bpf_prog->jited_list = 1;
+	return;
+fail:
+	bpf_jit_binary_free(header);
+}
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
@@ -1176,6 +1339,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
 		prog->jited_len = proglen;
+		try_do_jit_list(prog, prog->bpf_func);
 	} else {
 		prog = orig_prog;
 	}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d813034e286..ad1e75bf0991 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,7 +517,8 @@ struct bpf_prog {
 					    const struct bpf_insn *insn);
 	/* Takes a list of struct bpf_work */
 	void			(*list_func)(struct list_head *list,
-					     const struct bpf_insn *insn);
+					     const struct bpf_insn *insn,
+					     const struct redirect_info *percpu_ri);
 	/* Instructions for interpreter */
 	union {
 		struct sock_filter	insns[0];
@@ -532,7 +533,7 @@ struct sk_filter {
 };
 
 #define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
-#define BPF_LIST_PROG_RUN(filter, list) (*(filter)->list_func)(list, (filter)->insnsi)
+#define BPF_LIST_PROG_RUN(filter, list, percpu) (*(filter)->list_func)(list, (filter)->insnsi, percpu)
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
@@ -638,10 +639,11 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 }
 
 static __always_inline void bpf_list_prog_run_xdp(const struct bpf_prog *prog,
-						  struct list_head *list)
+						  struct list_head *list,
+						  const struct redirect_info *percpu_ri)
 {
 	/* Caller must hold rcu_read_lock(), as per bpf_prog_run_xdp(). */
-	BPF_LIST_PROG_RUN(prog, list);
+	BPF_LIST_PROG_RUN(prog, list, percpu_ri);
 }
 
 static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
@@ -756,6 +758,15 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }
 
+static inline struct bpf_binary_header *
+bpf_list_jit_binary_hdr(const struct bpf_prog *fp)
+{
+	unsigned long real_start = (unsigned long)fp->list_func;
+	unsigned long addr = real_start & PAGE_MASK;
+
+	return (void *)addr;
+}
+
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c35da826cc3b..028be88c4af8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -621,15 +621,22 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
  */
 void __weak bpf_jit_free(struct bpf_prog *fp)
 {
-	if (fp->jited) {
-		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+	struct bpf_binary_header *hdr;
 
+	if (fp->jited) {
+		hdr = bpf_jit_binary_hdr(fp);
 		bpf_jit_binary_unlock_ro(hdr);
 		bpf_jit_binary_free(hdr);
 
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
 	}
 
+	if (fp->jited_list) {
+		hdr = bpf_list_jit_binary_hdr(fp);
+		bpf_jit_binary_unlock_ro(hdr);
+		bpf_jit_binary_free(hdr);
+	}
+
 	bpf_prog_unlock_free(fp);
 }
 
@@ -1358,13 +1365,13 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
 
 #define LIST_PROG_NAME(stack_size) __bpf_list_prog_run##stack_size
 #define DEFINE_BPF_LIST_PROG_RUN(stack_size) \
-static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct bpf_insn *insn) \
+static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct bpf_insn *insn, const struct redirect_info *percpu_ri) \
 { \
 	struct bpf_work *work; \
 \
 	list_for_each_entry(work, list, list) { \
 		work->ret = PROG_NAME(stack_size)(work->ctx, insn); \
-		work->ri = *this_cpu_ptr(&redirect_info); \
+		work->ri = *percpu_ri; \
 	} \
 }
 
@@ -1398,7 +1405,8 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 #undef PROG_NAME_LIST
 #define PROG_NAME_LIST(stack_size) LIST_PROG_NAME(stack_size),
 static void (*list_interpreters[])(struct list_head *list,
-				   const struct bpf_insn *insn) = {
+				   const struct bpf_insn *insn,
+				   const struct redirect_info *percpu_ri) = {
 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
diff --git a/net/core/dev.c b/net/core/dev.c
index 746112c22afd..7c1879045ef8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4214,6 +4214,7 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
 				struct sk_buff_head *list,
 				struct sk_buff_head *pass_list)
 {
+	const struct redirect_info *percpu_ri = this_cpu_ptr(&redirect_info);
 	struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
 	struct bpf_work *bw;
 	struct sk_buff *skb;
@@ -4249,11 +4250,11 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
 
 	if (xdp_prog->list_func && (xdp_prog->jited_list ||
 				    !xdp_prog->jited))
-		bpf_list_prog_run_xdp(xdp_prog, &xdp_list);
+		bpf_list_prog_run_xdp(xdp_prog, &xdp_list, percpu_ri);
 	else
 		list_for_each_entry(bw, &xdp_list, list) {
 			bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
-			bw->ri = *this_cpu_ptr(&redirect_info);
+			bw->ri = *percpu_ri;
 		}
 
 	for (i = 0; i < n; i++) {

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ