[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220506230244.4t4p5gnbgg6i4tht@kafai-mbp.dhcp.thefacebook.com>
Date: Fri, 6 May 2022 16:02:44 -0700
From: Martin KaFai Lau <kafai@...com>
To: Stanislav Fomichev <sdf@...gle.com>
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org, ast@...nel.org,
daniel@...earbox.net, andrii@...nel.org
Subject: Re: [PATCH bpf-next v6 03/10] bpf: per-cgroup lsm flavor
On Fri, Apr 29, 2022 at 02:15:33PM -0700, Stanislav Fomichev wrote:
> diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
> index 064eccba641d..a0e68ef5dfb1 100644
> --- a/kernel/bpf/bpf_lsm.c
> +++ b/kernel/bpf/bpf_lsm.c
> @@ -16,6 +16,7 @@
> #include <linux/bpf_local_storage.h>
> #include <linux/btf_ids.h>
> #include <linux/ima.h>
> +#include <linux/bpf-cgroup.h>
>
> /* For every LSM hook that allows attachment of BPF programs, declare a nop
> * function where a BPF program can be attached.
> @@ -35,6 +36,66 @@ BTF_SET_START(bpf_lsm_hooks)
> #undef LSM_HOOK
> BTF_SET_END(bpf_lsm_hooks)
>
> +/* List of LSM hooks that should operate on 'current' cgroup regardless
> + * of function signature.
> + */
> +BTF_SET_START(bpf_lsm_current_hooks)
> +/* operate on freshly allocated sk without any cgroup association */
> +BTF_ID(func, bpf_lsm_sk_alloc_security)
> +BTF_ID(func, bpf_lsm_sk_free_security)
> +BTF_SET_END(bpf_lsm_current_hooks)
> +
> +int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
> + bpf_func_t *bpf_func)
> +{
> + const struct btf_type *first_arg_type;
> + const struct btf_type *sock_type;
> + const struct btf_type *sk_type;
> + const struct btf *btf_vmlinux;
> + const struct btf_param *args;
> + s32 type_id;
> +
> + if (!prog->aux->attach_func_proto ||
> + !btf_type_is_func_proto(prog->aux->attach_func_proto))
Also remove these tests during the attach time. These should have
already been checked during the load time.
> + return -EINVAL;
> +
> + if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
> + btf_id_set_contains(&bpf_lsm_current_hooks,
> + prog->aux->attach_btf_id)) {
> + *bpf_func = __cgroup_bpf_run_lsm_current;
> + return 0;
> + }
> +
> + args = btf_params(prog->aux->attach_func_proto);
> +
> + btf_vmlinux = bpf_get_btf_vmlinux();
> +
> + type_id = btf_find_by_name_kind(btf_vmlinux, "socket", BTF_KIND_STRUCT);
> + if (type_id < 0)
> + return -EINVAL;
> + sock_type = btf_type_by_id(btf_vmlinux, type_id);
> +
> + type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT);
> + if (type_id < 0)
> + return -EINVAL;
> + sk_type = btf_type_by_id(btf_vmlinux, type_id);
Although practical kconfig should have CONFIG_NET, not sure btf has
"socket" and "sock" in some unusual kconfig setup. If it does not, it will
return error too early for other hooks.
May be put them under "#ifdef CONFIG_NET"? While adding CONFIG_NET,
it will be easier to just use the btf_sock_ids[]. "socket" needs to be
added to btf_sock_ids[]. Take a look at btf_ids.h and filter.c.
> +
> + first_arg_type = btf_type_resolve_ptr(btf_vmlinux, args[0].type, NULL);
> + if (first_arg_type == sock_type)
> + *bpf_func = __cgroup_bpf_run_lsm_socket;
> + else if (first_arg_type == sk_type)
> + *bpf_func = __cgroup_bpf_run_lsm_sock;
> + else
> + *bpf_func = __cgroup_bpf_run_lsm_current;
> +
> + return 0;
> +}
[ ... ]
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 134785ab487c..9cc38454e402 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -14,6 +14,9 @@
> #include <linux/string.h>
> #include <linux/bpf.h>
> #include <linux/bpf-cgroup.h>
> +#include <linux/btf_ids.h>
> +#include <linux/bpf_lsm.h>
> +#include <linux/bpf_verifier.h>
> #include <net/sock.h>
> #include <net/bpf_sk_storage.h>
>
> @@ -61,6 +64,85 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
> return run_ctx.retval;
> }
>
> +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
> + const struct bpf_insn *insn)
> +{
> + const struct bpf_prog *shim_prog;
> + struct sock *sk;
> + struct cgroup *cgrp;
> + int ret = 0;
> + u64 *regs;
> +
> + regs = (u64 *)ctx;
> + sk = (void *)(unsigned long)regs[BPF_REG_0];
> + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> + if (likely(cgrp))
> + ret = bpf_prog_run_array_cg(&cgrp->bpf,
> + shim_prog->aux->cgroup_atype,
> + ctx, bpf_prog_run, 0, NULL);
> + return ret;
> +}
> +
> +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
> + const struct bpf_insn *insn)
> +{
> + const struct bpf_prog *shim_prog;
> + struct socket *sock;
> + struct cgroup *cgrp;
> + int ret = 0;
> + u64 *regs;
> +
> + regs = (u64 *)ctx;
> + sock = (void *)(unsigned long)regs[BPF_REG_0];
> + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> + cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
> + if (likely(cgrp))
> + ret = bpf_prog_run_array_cg(&cgrp->bpf,
> + shim_prog->aux->cgroup_atype,
> + ctx, bpf_prog_run, 0, NULL);
> + return ret;
> +}
> +
> +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
> + const struct bpf_insn *insn)
> +{
> + const struct bpf_prog *shim_prog;
> + struct cgroup *cgrp;
> + int ret = 0;
>From lsm_hook_defs.h, there are some default return values that are not 0.
Is it ok to always return 0 in cases like the cgroup array is empty ?
> +
> + if (unlikely(!current))
> + return 0;
> +
> + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> + rcu_read_lock();
> + cgrp = task_dfl_cgroup(current);
> + if (likely(cgrp))
> + ret = bpf_prog_run_array_cg(&cgrp->bpf,
> + shim_prog->aux->cgroup_atype,
> + ctx, bpf_prog_run, 0, NULL);
> + rcu_read_unlock();
> + return ret;
> +}
[ ... ]
> @@ -479,6 +572,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
> struct bpf_prog *old_prog = NULL;
> struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
> struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
> + struct bpf_attach_target_info tgt_info = {};
> enum cgroup_bpf_attach_type atype;
> struct bpf_prog_list *pl;
> struct hlist_head *progs;
> @@ -495,9 +589,34 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
> /* replace_prog implies BPF_F_REPLACE, and vice versa */
> return -EINVAL;
>
> - atype = to_cgroup_bpf_attach_type(type);
> - if (atype < 0)
> - return -EINVAL;
> + if (type == BPF_LSM_CGROUP) {
> + struct bpf_prog *p = prog ? : link->link.prog;
nit. This "prog ? ...." has been done multiple times (new and existing codes)
in this function. may be do it once at the very beginning.
> +
> + if (replace_prog) {
> + /* Reusing shim from the original program. */
> + if (replace_prog->aux->attach_btf_id !=
> + p->aux->attach_btf_id)
> + return -EINVAL;
> +
> + atype = replace_prog->aux->cgroup_atype;
> + } else {
> + err = bpf_check_attach_target(NULL, p, NULL,
> + p->aux->attach_btf_id,
> + &tgt_info);
> + if (err)
> + return -EINVAL;
return err;
> +
> + atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id);
> + if (atype < 0)
> + return atype;
> + }
> +
> + p->aux->cgroup_atype = atype;
> + } else {
> + atype = to_cgroup_bpf_attach_type(type);
> + if (atype < 0)
> + return -EINVAL;
> + }
>
> progs = &cgrp->bpf.progs[atype];
[ ... ]
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 0c4fd194e801..77dfa517c47c 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -11,6 +11,8 @@
> #include <linux/rcupdate_wait.h>
> #include <linux/module.h>
> #include <linux/static_call.h>
> +#include <linux/bpf_verifier.h>
> +#include <linux/bpf_lsm.h>
>
> /* dummy _ops. The verifier will operate on target program's ops. */
> const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> @@ -485,6 +487,147 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
> return err;
> }
>
> +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
> +static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog,
> + bpf_func_t bpf_func)
> +{
> + struct bpf_prog *p;
> +
> + p = bpf_prog_alloc(1, 0);
> + if (!p)
> + return NULL;
> +
> + p->jited = false;
> + p->bpf_func = bpf_func;
> +
> + p->aux->cgroup_atype = prog->aux->cgroup_atype;
> + p->aux->attach_func_proto = prog->aux->attach_func_proto;
> + p->aux->attach_btf_id = prog->aux->attach_btf_id;
> + p->aux->attach_btf = prog->aux->attach_btf;
> + btf_get(p->aux->attach_btf);
> + p->type = BPF_PROG_TYPE_LSM;
> + p->expected_attach_type = BPF_LSM_MAC;
> + bpf_prog_inc(p);
> +
> + return p;
> +}
> +
> +static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr,
> + bpf_func_t bpf_func)
> +{
> + const struct bpf_prog_aux *aux;
> + int kind;
> +
> + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
> + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
> + struct bpf_prog *p = aux->prog;
> +
> + if (p->bpf_func == bpf_func)
> + return p;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> + struct bpf_attach_target_info *tgt_info)
> +{
> + struct bpf_prog *shim_prog = NULL;
> + struct bpf_trampoline *tr;
> + bpf_func_t bpf_func;
> + u64 key;
> + int err;
> +
> + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> + prog->aux->attach_btf_id);
> +
> + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> + if (err)
> + return err;
> +
> + tr = bpf_trampoline_get(key, tgt_info);
> + if (!tr)
> + return -ENOMEM;
> +
> + mutex_lock(&tr->mutex);
> +
> + shim_prog = cgroup_shim_find(tr, bpf_func);
> + if (shim_prog) {
> + /* Reusing existing shim attached by the other program. */
> + bpf_prog_inc(shim_prog);
> + mutex_unlock(&tr->mutex);
> + return 0;
> + }
> +
> + /* Allocate and install new shim. */
> +
> + shim_prog = cgroup_shim_alloc(prog, bpf_func);
> + if (!shim_prog) {
> + err = -ENOMEM;
> + goto out;
> + }
> +
> + err = __bpf_trampoline_link_prog(shim_prog, tr);
> + if (err)
> + goto out;
> +
> + mutex_unlock(&tr->mutex);
> +
> + return 0;
> +out:
> + if (shim_prog)
> + bpf_prog_put(shim_prog);
> +
bpf_trampoline_get() was done earlier.
Does it need to call bpf_trampoline_put(tr) in error cases ?
More on tr refcnt later.
> + mutex_unlock(&tr->mutex);
> + return err;
> +}
> +
> +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
> +{
> + struct bpf_prog *shim_prog;
> + struct bpf_trampoline *tr;
> + bpf_func_t bpf_func;
> + u64 key;
> + int err;
> +
> + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> + prog->aux->attach_btf_id);
> +
> + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> + if (err)
> + return;
> +
> + tr = bpf_trampoline_lookup(key);
> + if (!tr)
> + return;
> +
> + mutex_lock(&tr->mutex);
> +
> + shim_prog = cgroup_shim_find(tr, bpf_func);
> + if (shim_prog) {
> + /* We use shim_prog refcnt for tracking whether to
> + * remove the shim program from the trampoline.
> + * Trampoline's mutex is held while refcnt is
> + * added/subtracted so we don't need to care about
> + * potential races.
> + */
> +
> + if (atomic64_read(&shim_prog->aux->refcnt) == 1)
> + WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr));
> +
> + bpf_prog_put(shim_prog);
> + }
> +
> + mutex_unlock(&tr->mutex);
> +
> + bpf_trampoline_put(tr); /* bpf_trampoline_lookup */
> +
> + if (shim_prog)
> + bpf_trampoline_put(tr);
While looking at the bpf_trampoline_{link,unlink}_cgroup_shim() again,
which prog (cgroup lsm progs or shim_prog) actually owns the tr.
It should be the shim_prog ?
How about storing tr in "shim_prog->aux->dst_trampoline = tr;"
Then the earlier bpf_prog_put(shim_prog) in this function will take care
of the bpf_trampoline_put(shim_prog->aux->dst_trampoline)
instead of each cgroup lsm prog owns a refcnt of the tr
and then this individual bpf_trampoline_put(tr) here can also
go away.
> +}
> +#endif
> +
> struct bpf_trampoline *bpf_trampoline_get(u64 key,
> struct bpf_attach_target_info *tgt_info)
> {
> @@ -609,6 +752,24 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
> rcu_read_unlock();
> }
>
> +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog)
> + __acquires(RCU)
> +{
> + /* Runtime stats are exported via actual BPF_LSM_CGROUP
> + * programs, not the shims.
> + */
> + rcu_read_lock();
> + migrate_disable();
> + return NO_START_TIME;
> +}
> +
> +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start)
> + __releases(RCU)
> +{
> + migrate_enable();
> + rcu_read_unlock();
> +}
> +
> u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
> {
> rcu_read_lock_trace();
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 813f6ee80419..99703d96c579 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -14455,6 +14455,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
> fallthrough;
> case BPF_MODIFY_RETURN:
> case BPF_LSM_MAC:
> + case BPF_LSM_CGROUP:
> case BPF_TRACE_FENTRY:
> case BPF_TRACE_FEXIT:
> if (!btf_type_is_func(t)) {
> @@ -14633,6 +14634,33 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
> return 0;
> }
>
> +static int check_used_maps(struct bpf_verifier_env *env)
> +{
> + int i;
> +
> + for (i = 0; i < env->used_map_cnt; i++) {
> + struct bpf_map *map = env->used_maps[i];
> +
> + switch (env->prog->expected_attach_type) {
> + case BPF_LSM_CGROUP:
> + if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
> + map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
> + break;
> +
> + if (map->key_size != sizeof(__u64)) {
Follow on my very last comment in v5. I think we should not
limit the bpf_cgroup_storage_key and should allow using both
cgroup_inode_id and attach_type as the key to avoid
inconsistent surprise (some keys are not allowed in a specific
attach_type), so this check_used_maps() function can also be avoided.
> + verbose(env, "only global cgroup local storage is supported for BPF_LSM_CGROUP\n");
> + return -EINVAL;
> + }
> +
> + break;
> + default:
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> struct btf *bpf_get_btf_vmlinux(void)
> {
> if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
> @@ -14854,6 +14882,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
> convert_pseudo_ld_imm64(env);
> }
>
> + ret = check_used_maps(env);
> + if (ret < 0)
> + goto err_release_maps;
> +
> adjust_btf_func(env);
>
> err_release_maps:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 444fe6f1cf35..112e396bbe65 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -998,6 +998,7 @@ enum bpf_attach_type {
> BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
> BPF_PERF_EVENT,
> BPF_TRACE_KPROBE_MULTI,
> + BPF_LSM_CGROUP,
> __MAX_BPF_ATTACH_TYPE
> };
>
> --
> 2.36.0.464.gb9c8b46e94-goog
>
Powered by blists - more mailing lists