netdev - Re: [PATCH] bpf,x64: pad NOPs to make images converge more easily

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a9a00c89-3716-2296-d0d9-bba944e2cd82@iogearbox.net>
Date:   Fri, 11 Dec 2020 22:13:12 +0100
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Andrii Nakryiko <andrii.nakryiko@...il.com>,
        Gary Lin <glin@...e.com>
Cc:     Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Eric Dumazet <eric.dumazet@...il.com>,
        andreas.taschner@...e.com
Subject: Re: [PATCH] bpf,x64: pad NOPs to make images converge more easily

On 12/11/20 9:58 PM, Andrii Nakryiko wrote:
> On Fri, Dec 11, 2020 at 8:51 AM Gary Lin <glin@...e.com> wrote:
>>
[...]
>> +static int emit_nops(u8 **pprog, int len)
>> +{
>> +       u8 *prog = *pprog;
>> +       int i, noplen, cnt = 0;
>> +
>> +       while (len > 0) {
>> +               noplen = len;
>> +
>> +               if (noplen > ASM_NOP_MAX)
>> +                       noplen = ASM_NOP_MAX;
>> +
>> +               for (i = 0; i < noplen; i++)
>> +                       EMIT1(ideal_nops[noplen][i]);
>> +               len -= noplen;
>> +       }
>> +
>> +       *pprog = prog;
>> +
>> +       return cnt;
> 
> Isn't cnt always zero? I guess it was supposed to be `cnt = len` at
> the beginning?

EMIT*() macros from the JIT will bump cnt internally.

> But then it begs the question how this patch was actually tested given
> emit_nops() is returning wrong answers? Changes like this should
> definitely come with tests.

Yeah, applying a change like this without tests for the corner cases it is trying
to fix would be no go, so they are definitely needed, ideally also with disasm dump
of the relevant code and detailed analysis to show why it's bullet-proof.

>> +#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
>> +
>>   static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>> -                 int oldproglen, struct jit_context *ctx)
>> +                 int oldproglen, struct jit_context *ctx, bool jmp_padding)
>>   {
>>          bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
>>          struct bpf_insn *insn = bpf_prog->insnsi;
>> @@ -1409,6 +1432,8 @@ xadd:                     if (is_imm8(insn->off))
>>                          }
>>                          jmp_offset = addrs[i + insn->off] - addrs[i];
>>                          if (is_imm8(jmp_offset)) {
>> +                               if (jmp_padding)
>> +                                       cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
>>                                  EMIT2(jmp_cond, jmp_offset);
>>                          } else if (is_simm32(jmp_offset)) {
>>                                  EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
>> @@ -1431,11 +1456,19 @@ xadd:                   if (is_imm8(insn->off))
>>                          else
>>                                  jmp_offset = addrs[i + insn->off] - addrs[i];
>>
>> -                       if (!jmp_offset)
>> -                               /* Optimize out nop jumps */
>> +                       if (!jmp_offset) {
>> +                               /*
>> +                                * If jmp_padding is enabled, the extra nops will
>> +                                * be inserted. Otherwise, optimize out nop jumps.
>> +                                */
>> +                               if (jmp_padding)
>> +                                       cnt += emit_nops(&prog, INSN_SZ_DIFF);
>>                                  break;
>> +                       }
>>   emit_jmp:
>>                          if (is_imm8(jmp_offset)) {
>> +                               if (jmp_padding)
>> +                                       cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
>>                                  EMIT2(0xEB, jmp_offset);
>>                          } else if (is_simm32(jmp_offset)) {
>>                                  EMIT1_off32(0xE9, jmp_offset);
>> @@ -1578,26 +1611,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>>          return 0;
>>   }
>>
>> -static void emit_nops(u8 **pprog, unsigned int len)
>> -{
>> -       unsigned int i, noplen;
>> -       u8 *prog = *pprog;
>> -       int cnt = 0;
>> -
>> -       while (len > 0) {
>> -               noplen = len;
>> -
>> -               if (noplen > ASM_NOP_MAX)
>> -                       noplen = ASM_NOP_MAX;
>> -
>> -               for (i = 0; i < noplen; i++)
>> -                       EMIT1(ideal_nops[noplen][i]);
>> -               len -= noplen;
>> -       }
>> -
>> -       *pprog = prog;
>> -}
>> -
>>   static void emit_align(u8 **pprog, u32 align)
>>   {
>>          u8 *target, *prog = *pprog;
>> @@ -1972,6 +1985,9 @@ struct x64_jit_data {
>>          struct jit_context ctx;
>>   };
>>
>> +#define MAX_PASSES 20
>> +#define PADDING_PASSES (MAX_PASSES - 5)
>> +
>>   struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>>   {
>>          struct bpf_binary_header *header = NULL;
>> @@ -1981,6 +1997,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>>          struct jit_context ctx = {};
>>          bool tmp_blinded = false;
>>          bool extra_pass = false;
>> +       bool padding = prog->padded;
> 
> can this ever be true on assignment? I.e., can the program be jitted twice?

Yes, progs can be passed into the JIT twice, see also jit_subprogs(). In one of
the earlier patches it would still potentially change the image size a second
time which would break subprogs aka bpf2bpf calls.

>>          u8 *image = NULL;
>>          int *addrs;
>>          int pass;
>> @@ -2043,7 +2060,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>>           * pass to emit the final image.
>>           */
>>          for (pass = 0; pass < 20 || image; pass++) {
>> -               proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
>> +               if (!padding && pass >= PADDING_PASSES)
>> +                       padding = true;
> 
[...]