netdev - Re: [PATCH bpf-next v7 03/11] bpf: per-cgroup lsm flavor

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKH8qBuUW8vSgTaF-K_kOPoX3kXBy5Z=ufcMx8mwTwkxs2wQ6g@mail.gmail.com>
Date:   Mon, 23 May 2022 19:15:03 -0700
From:   Stanislav Fomichev <sdf@...gle.com>
To:     Martin KaFai Lau <kafai@...com>
Cc:     netdev@...r.kernel.org, bpf@...r.kernel.org, ast@...nel.org,
        daniel@...earbox.net, andrii@...nel.org
Subject: Re: [PATCH bpf-next v7 03/11] bpf: per-cgroup lsm flavor

,

On Fri, May 20, 2022 at 5:53 PM Martin KaFai Lau <kafai@...com> wrote:
>
> On Wed, May 18, 2022 at 03:55:23PM -0700, Stanislav Fomichev wrote:
>
> [ ... ]
>
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index ea3674a415f9..70cf1dad91df 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -768,6 +768,10 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
> >  u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
> >  void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
> >                                      struct bpf_tramp_run_ctx *run_ctx);
> > +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
> > +                                     struct bpf_tramp_run_ctx *run_ctx);
> > +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
> > +                                     struct bpf_tramp_run_ctx *run_ctx);
> >  void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
> >  void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
> >
> > @@ -1035,6 +1039,7 @@ struct bpf_prog_aux {
> >       u64 load_time; /* ns since boottime */
> >       u32 verified_insns;
> >       struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
> > +     int cgroup_atype; /* enum cgroup_bpf_attach_type */
> >       char name[BPF_OBJ_NAME_LEN];
> >  #ifdef CONFIG_SECURITY
> >       void *security;
> > @@ -1107,6 +1112,12 @@ struct bpf_tramp_link {
> >       u64 cookie;
> >  };
> >
> > +struct bpf_shim_tramp_link {
> > +     struct bpf_tramp_link tramp_link;
> > +     struct bpf_trampoline *tr;
> > +     atomic64_t refcnt;
> There is already a refcnt in 'struct bpf_link'.
> Reuse that one if possible.

I was assuming that having a per-bpf_shim_tramp_link recfnt might be
more readable. I'll switch to the one from bpf_link per comments
below.

> [ ... ]
>
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index 01ce78c1df80..c424056f0b35 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> > @@ -11,6 +11,8 @@
> >  #include <linux/rcupdate_wait.h>
> >  #include <linux/module.h>
> >  #include <linux/static_call.h>
> > +#include <linux/bpf_verifier.h>
> > +#include <linux/bpf_lsm.h>
> >
> >  /* dummy _ops. The verifier will operate on target program's ops. */
> >  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> > @@ -497,6 +499,163 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
> >       return err;
> >  }
> >
> > +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
> > +static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
> > +                                                  bpf_func_t bpf_func)
> > +{
> > +     struct bpf_shim_tramp_link *shim_link = NULL;
> > +     struct bpf_prog *p;
> > +
> > +     shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
> > +     if (!shim_link)
> > +             return NULL;
> > +
> > +     p = bpf_prog_alloc(1, 0);
> > +     if (!p) {
> > +             kfree(shim_link);
> > +             return NULL;
> > +     }
> > +
> > +     p->jited = false;
> > +     p->bpf_func = bpf_func;
> > +
> > +     p->aux->cgroup_atype = prog->aux->cgroup_atype;
> > +     p->aux->attach_func_proto = prog->aux->attach_func_proto;
> > +     p->aux->attach_btf_id = prog->aux->attach_btf_id;
> > +     p->aux->attach_btf = prog->aux->attach_btf;
> > +     btf_get(p->aux->attach_btf);
> > +     p->type = BPF_PROG_TYPE_LSM;
> > +     p->expected_attach_type = BPF_LSM_MAC;
> > +     bpf_prog_inc(p);
> > +     bpf_link_init(&shim_link->tramp_link.link, BPF_LINK_TYPE_TRACING, NULL, p);
> > +     atomic64_set(&shim_link->refcnt, 1);
> > +
> > +     return shim_link;
> > +}
> > +
> > +static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
> > +                                                 bpf_func_t bpf_func)
> > +{
> > +     struct bpf_tramp_link *link;
> > +     int kind;
> > +
> > +     for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
> > +             hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
> > +                     struct bpf_prog *p = link->link.prog;
> > +
> > +                     if (p->bpf_func == bpf_func)
> > +                             return container_of(link, struct bpf_shim_tramp_link, tramp_link);
> > +             }
> > +     }
> > +
> > +     return NULL;
> > +}
> > +
> > +static void cgroup_shim_put(struct bpf_shim_tramp_link *shim_link)
> > +{
> > +     if (shim_link->tr)
> I have been spinning back and forth with this "shim_link->tr" test and
> the "!shim_link->tr" test below with an atomic64_dec_and_test() test
> in between  :)

I did this dance so I can call cgroup_shim_put from
bpf_trampoline_link_cgroup_shim, I guess that's confusing.
bpf_trampoline_link_cgroup_shim can call cgroup_shim_put when
__bpf_trampoline_link_prog fails (shim_prog->tr==NULL);
cgroup_shim_put can be also called to unlink the prog from the
trampoline (shim_prog->tr!=NULL).

> > +             bpf_trampoline_put(shim_link->tr);
> Why put(tr) here?
>
> Intuitive thinking is that should be done after __bpf_trampoline_unlink_prog(.., tr)
> which is still using the tr.
> or I missed something inside __bpf_trampoline_unlink_prog(..., tr) ?
>
> > +
> > +     if (!atomic64_dec_and_test(&shim_link->refcnt))
> > +             return;
> > +
> > +     if (!shim_link->tr)
> And this is only for the error case in bpf_trampoline_link_cgroup_shim()?
> Can it be handled locally in bpf_trampoline_link_cgroup_shim()
> where it could actually happen ?

Yeah, agreed, I'll move the cleanup path to
bpf_trampoline_link_cgroup_shim to make it less confusing here.

> > +             return;
> > +
> > +     WARN_ON_ONCE(__bpf_trampoline_unlink_prog(&shim_link->tramp_link, shim_link->tr));
> > +     kfree(shim_link);
> How about shim_link->tramp_link.link.prog, is the prog freed ?
>
> Considering the bpf_link_put() does bpf_prog_put(link->prog).
> Is there a reason the bpf_link_put() not used and needs to
> manage its own shim_link->refcnt here ?

Good catch, I've missed the bpf_prog_put(link->prog) part. Let me see
if I can use the link's refcnt, it seems like I can define my own
link->ops->dealloc to call __bpf_trampoline_unlink_prog and the rest
will be taken care of.

> > +}
> > +
> > +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> > +                                 struct bpf_attach_target_info *tgt_info)
> > +{
> > +     struct bpf_shim_tramp_link *shim_link = NULL;
> > +     struct bpf_trampoline *tr;
> > +     bpf_func_t bpf_func;
> > +     u64 key;
> > +     int err;
> > +
> > +     key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> > +                                      prog->aux->attach_btf_id);
> > +
> > +     err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> > +     if (err)
> > +             return err;
> > +
> > +     tr = bpf_trampoline_get(key, tgt_info);
> > +     if (!tr)
> > +             return  -ENOMEM;
> > +
> > +     mutex_lock(&tr->mutex);
> > +
> > +     shim_link = cgroup_shim_find(tr, bpf_func);
> > +     if (shim_link) {
> > +             /* Reusing existing shim attached by the other program. */
> > +             atomic64_inc(&shim_link->refcnt);
> > +             /* note, we're still holding tr refcnt from above */
> hmm... why it still needs to hold the tr refcnt ?

I'm assuming we need to hold the trampoline for as long as shim_prog
is attached to it, right? Otherwise it gets kfreed.



> > +
> > +             mutex_unlock(&tr->mutex);
> > +             return 0;
> > +     }
> > +
> > +     /* Allocate and install new shim. */
> > +
> > +     shim_link = cgroup_shim_alloc(prog, bpf_func);
> > +     if (!shim_link) {
> > +             bpf_trampoline_put(tr);
> > +             err = -ENOMEM;
> > +             goto out;
> > +     }
> > +
> > +     err = __bpf_trampoline_link_prog(&shim_link->tramp_link, tr);
> > +     if (err)
> > +             goto out;
> > +
> > +     shim_link->tr = tr;
> > +
> > +     mutex_unlock(&tr->mutex);
> > +
> > +     return 0;
> > +out:
> > +     mutex_unlock(&tr->mutex);
> > +
> > +     if (shim_link)
> > +             cgroup_shim_put(shim_link);
> > +
> > +     return err;
> > +}
> > +