[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzZw+jX13JuiVueKhpufQ9qHEBc0xYtqKdhhUV00afx0Gw@mail.gmail.com>
Date: Mon, 18 Nov 2019 11:36:12 -0800
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Björn Töpel <bjorn.topel@...il.com>
Cc: Networking <netdev@...r.kernel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Björn Töpel <bjorn.topel@...el.com>,
bpf <bpf@...r.kernel.org>,
Magnus Karlsson <magnus.karlsson@...il.com>,
Magnus Karlsson <magnus.karlsson@...el.com>,
Jonathan Lemon <jonathan.lemon@...il.com>
Subject: Re: [RFC PATCH bpf-next 2/4] bpf: introduce BPF dispatcher
On Wed, Nov 13, 2019 at 12:48 PM Björn Töpel <bjorn.topel@...il.com> wrote:
>
> From: Björn Töpel <bjorn.topel@...el.com>
>
> The BPF dispatcher builds on top of the BPF trampoline ideas;
> Introduce bpf_arch_text_poke() and (re-)use the BPF JIT generate
> code. The dispatcher builds a dispatch table for XDP programs, for
> retpoline avoidance. The table is a simple binary search model, so
> lookup is O(log n). Here, the dispatch table is limited to four
> entries (for laziness reason -- only 1B relative jumps :-P). If the
> dispatch table is full, it will fallback to the retpoline path.
>
> An example: A module/driver allocates a dispatcher. The dispatcher is
> shared for all netdevs. Each netdev allocate a slot in the dispatcher
> and a BPF program. The netdev then uses the dispatcher to call the
> correct program with a direct call (actually a tail-call).
>
> Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
> ---
> arch/x86/net/bpf_jit_comp.c | 96 ++++++++++++++++++
> kernel/bpf/Makefile | 1 +
> kernel/bpf/dispatcher.c | 197 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 294 insertions(+)
> create mode 100644 kernel/bpf/dispatcher.c
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 28782a1c386e..d75aebf508b8 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -10,10 +10,12 @@
> #include <linux/if_vlan.h>
> #include <linux/bpf.h>
> #include <linux/memory.h>
> +#include <linux/sort.h>
> #include <asm/extable.h>
> #include <asm/set_memory.h>
> #include <asm/nospec-branch.h>
> #include <asm/text-patching.h>
> +#include <asm/asm-prototypes.h>
>
[...]
> +
> +int arch_prepare_bpf_dispatcher(void *image, struct bpf_prog **progs,
> + int num_progs)
> +{
> + u64 ips[BPF_DISPATCHER_MAX] = {};
> + u8 *fallback, *prog = image;
> + int i, err, cnt = 0;
> +
> + if (!num_progs || num_progs > BPF_DISPATCHER_MAX)
> + return -EINVAL;
> +
> + for (i = 0; i < num_progs; i++)
> + ips[i] = (u64)progs[i]->bpf_func;
> +
> + EMIT2(0xEB, 5); /* jmp rip+5 (skip retpoline) */
> + fallback = prog;
> + err = emit_jmp(&prog, /* jmp retpoline */
> + __x86_indirect_thunk_rdx, prog);
> + if (err)
> + return err;
> +
> + sort(&ips[0], num_progs, sizeof(ips[i]), cmp_ips, NULL);
nit: sizeof(ips[i]) looks weird...
> + return emit_bpf_dispatcher(&prog, 0, num_progs - 1, &ips[0], fallback);
> +}
> +
> struct x64_jit_data {
> struct bpf_binary_header *header;
> int *addrs;
[...]
> +
> +static int bpf_dispatcher_add_prog(struct bpf_dispatcher *d,
> + struct bpf_prog *prog)
> +{
> + struct bpf_prog **entry = NULL;
> + int i, err = 0;
> +
> + if (d->num_progs == BPF_DISPATCHER_MAX)
> + return err;
err == 0, not what you want, probably
> +
> + for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
> + if (!entry && !d->progs[i])
> + entry = &d->progs[i];
> + if (d->progs[i] == prog)
> + return err;
> + }
> +
> + prog = bpf_prog_inc(prog);
> + if (IS_ERR(prog))
> + return err;
> +
> + *entry = prog;
> + d->num_progs++;
> + return err;
> +}
> +
> +static void bpf_dispatcher_remove_prog(struct bpf_dispatcher *d,
> + struct bpf_prog *prog)
> +{
> + int i;
> +
> + for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
> + if (d->progs[i] == prog) {
> + bpf_prog_put(prog);
> + d->progs[i] = NULL;
> + d->num_progs--;
instead of allowing holes, why not swap removed prog with the last on
in d->progs?
> + break;
> + }
> + }
> +}
> +
> +int __weak arch_prepare_bpf_dispatcher(void *image, struct bpf_prog **progs,
> + int num_ids)
> +{
> + return -ENOTSUPP;
> +}
> +
> +/* NB! bpf_dispatcher_update() might free the dispatcher! */
> +static int bpf_dispatcher_update(struct bpf_dispatcher *d)
> +{
> + void *old_image = d->image + ((d->selector + 1) & 1) * PAGE_SIZE / 2;
> + void *new_image = d->image + (d->selector & 1) * PAGE_SIZE / 2;
> + int err;
> +
> + if (d->num_progs == 0) {
> + err = bpf_arch_text_poke(d->func, BPF_MOD_JMP_TO_NOP,
> + old_image, NULL);
> + bpf_dispatcher_free(d);
> + goto out;
> + }
> +
> + err = arch_prepare_bpf_dispatcher(new_image, &d->progs[0],
> + d->num_progs);
> + if (err)
> + goto out;
> +
> + if (d->selector)
> + /* progs already running at this address */
> + err = bpf_arch_text_poke(d->func, BPF_MOD_JMP_TO_JMP,
> + old_image, new_image);
> + else
> + /* first time registering */
> + err = bpf_arch_text_poke(d->func, BPF_MOD_NOP_TO_JMP,
> + NULL, new_image);
> +
> + if (err)
> + goto out;
> + d->selector++;
> +
> +out:
> + return err;
> +}
> +
> +void bpf_dispatcher_change_prog(void *func, struct bpf_prog *from,
> + struct bpf_prog *to)
> +{
> + struct bpf_dispatcher *d;
> +
> + if (!from && !to)
> + return;
> +
> + mutex_lock(&dispatcher_mutex);
> + d = bpf_dispatcher_lookup(func);
> + if (!d)
> + goto out;
> +
> + if (from)
> + bpf_dispatcher_remove_prog(d, from);
> +
> + if (to)
> + bpf_dispatcher_add_prog(d, to);
this can fail
> +
> + WARN_ON(bpf_dispatcher_update(d));
shouldn't dispatcher be removed from the list before freed? It seems
like handling dispatches freeing is better done here explicitly (and
you won't need to leave a NB remark)
> +
> +out:
> + mutex_unlock(&dispatcher_mutex);
> +}
> +
> +static int __init init_dispatchers(void)
> +{
> + int i;
> +
> + for (i = 0; i < DISPATCHER_TABLE_SIZE; i++)
> + INIT_HLIST_HEAD(&dispatcher_table[i]);
> + return 0;
> +}
> +late_initcall(init_dispatchers);
> +
> +#endif
> --
> 2.20.1
>
Powered by blists - more mailing lists