[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzbJRf-+_GE4r2+mk0FjT96Qszx3ru9wEfieP_zr6p6dOw@mail.gmail.com>
Date: Fri, 11 Dec 2020 12:58:17 -0800
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Gary Lin <glin@...e.com>
Cc: Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Eric Dumazet <eric.dumazet@...il.com>,
andreas.taschner@...e.com
Subject: Re: [PATCH] bpf,x64: pad NOPs to make images converge more easily
On Fri, Dec 11, 2020 at 8:51 AM Gary Lin <glin@...e.com> wrote:
>
> The x64 bpf jit expects bpf images converge within the given passes, but
> it could fail to do so with some corner cases. For example:
>
> l0: ldh [4]
> l1: jeq #0x537d, l2, l40
> l2: ld [0]
> l3: jeq #0xfa163e0d, l4, l40
> l4: ldh [12]
> l5: ldx #0xe
> l6: jeq #0x86dd, l41, l7
> l8: ld [x+16]
> l9: ja 41
>
> [... repeated ja 41 ]
>
> l40: ja 41
> l41: ret #0
> l42: ld #len
> l43: ret a
>
> This bpf program contains 32 "ja 41" instructions which are effectively
> NOPs and designed to be replaced with valid code dynamically. Ideally,
> bpf jit should optimize those "ja 41" instructions out when translating
> the bpf instructions into x86_64 machine code. However, do_jit() can
> only remove one "ja 41" for offset==0 on each pass, so it requires at
> least 32 runs to eliminate those JMPs and exceeds the current limit of
> passes (20). In the end, the program got rejected when BPF_JIT_ALWAYS_ON
> is set even though it's legit as a classic socket filter.
>
> To make the image more likely converge within 20 passes, this commit
> pads some instructions with NOPs in the last 5 passes:
>
> 1. conditional jumps
> A possible size variance comes from the adoption of imm8 JMP. If the
> offset is imm8, we calculate the size difference of this BPF instruction
> between the previous pass and the current pass and fill the gap with NOPs.
> To avoid the recalculation of jump offset, those NOPs are inserted before
> the JMP code, so we have to subtract the 2 bytes of imm8 JMP when
> calculating the NOP number.
>
> 2. BPF_JA
> There are two conditions for BPF_JA.
> a.) nop jumps
> If this instruction is not optimized out in the previous pass,
> instead of removing it, we insert the equivalent size of NOPs.
> b.) label jumps
> Similar to condition jumps, we prepend NOPs right before the JMP
> code.
>
> To make the code concise, emit_nops() is modified to use the signed len and
> return the number of inserted NOPs.
>
> To support bpf-to-bpf, a new flag, padded, is introduced to 'struct bpf_prog'
> so that bpf_int_jit_compile() could know if the program is padded or not.
>
> Signed-off-by: Gary Lin <glin@...e.com>
> ---
> arch/x86/net/bpf_jit_comp.c | 68 ++++++++++++++++++++++++-------------
> include/linux/filter.h | 1 +
> 2 files changed, 45 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 796506dcfc42..30b81c8539b3 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -789,8 +789,31 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
> }
> }
>
> +static int emit_nops(u8 **pprog, int len)
> +{
> + u8 *prog = *pprog;
> + int i, noplen, cnt = 0;
> +
> + while (len > 0) {
> + noplen = len;
> +
> + if (noplen > ASM_NOP_MAX)
> + noplen = ASM_NOP_MAX;
> +
> + for (i = 0; i < noplen; i++)
> + EMIT1(ideal_nops[noplen][i]);
> + len -= noplen;
> + }
> +
> + *pprog = prog;
> +
> + return cnt;
Isn't cnt always zero? I guess it was supposed to be `cnt = len` at
the beginning?
But then it begs the question how this patch was actually tested given
emit_nops() is returning wrong answers? Changes like this should
definitely come with tests.
> +}
> +
> +#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
> +
> static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
> - int oldproglen, struct jit_context *ctx)
> + int oldproglen, struct jit_context *ctx, bool jmp_padding)
> {
> bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
> struct bpf_insn *insn = bpf_prog->insnsi;
> @@ -1409,6 +1432,8 @@ xadd: if (is_imm8(insn->off))
> }
> jmp_offset = addrs[i + insn->off] - addrs[i];
> if (is_imm8(jmp_offset)) {
> + if (jmp_padding)
> + cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
> EMIT2(jmp_cond, jmp_offset);
> } else if (is_simm32(jmp_offset)) {
> EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
> @@ -1431,11 +1456,19 @@ xadd: if (is_imm8(insn->off))
> else
> jmp_offset = addrs[i + insn->off] - addrs[i];
>
> - if (!jmp_offset)
> - /* Optimize out nop jumps */
> + if (!jmp_offset) {
> + /*
> + * If jmp_padding is enabled, the extra nops will
> + * be inserted. Otherwise, optimize out nop jumps.
> + */
> + if (jmp_padding)
> + cnt += emit_nops(&prog, INSN_SZ_DIFF);
> break;
> + }
> emit_jmp:
> if (is_imm8(jmp_offset)) {
> + if (jmp_padding)
> + cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
> EMIT2(0xEB, jmp_offset);
> } else if (is_simm32(jmp_offset)) {
> EMIT1_off32(0xE9, jmp_offset);
> @@ -1578,26 +1611,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> return 0;
> }
>
> -static void emit_nops(u8 **pprog, unsigned int len)
> -{
> - unsigned int i, noplen;
> - u8 *prog = *pprog;
> - int cnt = 0;
> -
> - while (len > 0) {
> - noplen = len;
> -
> - if (noplen > ASM_NOP_MAX)
> - noplen = ASM_NOP_MAX;
> -
> - for (i = 0; i < noplen; i++)
> - EMIT1(ideal_nops[noplen][i]);
> - len -= noplen;
> - }
> -
> - *pprog = prog;
> -}
> -
> static void emit_align(u8 **pprog, u32 align)
> {
> u8 *target, *prog = *pprog;
> @@ -1972,6 +1985,9 @@ struct x64_jit_data {
> struct jit_context ctx;
> };
>
> +#define MAX_PASSES 20
> +#define PADDING_PASSES (MAX_PASSES - 5)
> +
> struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> {
> struct bpf_binary_header *header = NULL;
> @@ -1981,6 +1997,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> struct jit_context ctx = {};
> bool tmp_blinded = false;
> bool extra_pass = false;
> + bool padding = prog->padded;
can this ever be true on assignment? I.e., can the program be jitted twice?
> u8 *image = NULL;
> int *addrs;
> int pass;
> @@ -2043,7 +2060,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> * pass to emit the final image.
> */
> for (pass = 0; pass < 20 || image; pass++) {
> - proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
> + if (!padding && pass >= PADDING_PASSES)
> + padding = true;
Just, unconditionally:
padding = pass >= PADDING_PASSES;
> + proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding);
> if (proglen <= 0) {
> out_image:
> image = NULL;
> @@ -2101,6 +2120,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> prog->bpf_func = (void *)image;
> prog->jited = 1;
> prog->jited_len = proglen;
> + prog->padded = padding;
> } else {
> prog = orig_prog;
> }
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 1b62397bd124..cb7ce2b3737a 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -531,6 +531,7 @@ struct bpf_prog {
> dst_needed:1, /* Do we need dst entry? */
> blinded:1, /* Was blinded */
> is_func:1, /* program is a bpf function */
> + padded:1, /* jitted image was padded */
> kprobe_override:1, /* Do we override a kprobe? */
> has_callchain_buf:1, /* callchain buffer allocated? */
> enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
> --
> 2.29.2
>
Powered by blists - more mailing lists