netdev - Re: [PATCH bpf-next v8 03/11] bpf: per-cgroup lsm flavor

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220604061154.kefsehmyrnwgxstk@kafai-mbp>
Date:   Fri, 3 Jun 2022 23:11:54 -0700
From:   Martin KaFai Lau <kafai@...com>
To:     Stanislav Fomichev <sdf@...gle.com>
Cc:     netdev@...r.kernel.org, bpf@...r.kernel.org, ast@...nel.org,
        daniel@...earbox.net, andrii@...nel.org
Subject: Re: [PATCH bpf-next v8 03/11] bpf: per-cgroup lsm flavor

On Wed, Jun 01, 2022 at 12:02:10PM -0700, Stanislav Fomichev wrote:
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 4adb4f3ecb7f..66b644a76a69 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -14,6 +14,9 @@
>  #include <linux/string.h>
>  #include <linux/bpf.h>
>  #include <linux/bpf-cgroup.h>
> +#include <linux/btf_ids.h>
This should not be needed ?

> +#include <linux/bpf_lsm.h>
> +#include <linux/bpf_verifier.h>
>  #include <net/sock.h>
>  #include <net/bpf_sk_storage.h>
>  
> @@ -61,6 +64,88 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
>  	return run_ctx.retval;
>  }
>  
> +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
> +				       const struct bpf_insn *insn)
> +{
> +	const struct bpf_prog *shim_prog;
> +	struct sock *sk;
> +	struct cgroup *cgrp;
> +	int ret = 0;
> +	u64 *args;
> +
> +	args = (u64 *)ctx;
> +	sk = (void *)(unsigned long)args[0];
> +	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> +	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> +	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> +	if (likely(cgrp))
> +		ret = bpf_prog_run_array_cg(&cgrp->bpf,
> +					    shim_prog->aux->cgroup_atype,
> +					    ctx, bpf_prog_run, 0, NULL);
> +	return ret;
> +}
> +
> +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
> +					 const struct bpf_insn *insn)
> +{
> +	const struct bpf_prog *shim_prog;
> +	struct socket *sock;
> +	struct cgroup *cgrp;
> +	int ret = 0;
> +	u64 *args;
> +
> +	args = (u64 *)ctx;
> +	sock = (void *)(unsigned long)args[0];
> +	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> +	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> +	cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
> +	if (likely(cgrp))
> +		ret = bpf_prog_run_array_cg(&cgrp->bpf,
> +					    shim_prog->aux->cgroup_atype,
> +					    ctx, bpf_prog_run, 0, NULL);
> +	return ret;
> +}
> +
> +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
> +					  const struct bpf_insn *insn)
> +{
> +	const struct bpf_prog *shim_prog;
> +	struct cgroup *cgrp;
> +	int ret = 0;
> +
> +	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> +	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> +	rcu_read_lock();
> +	cgrp = task_dfl_cgroup(current);
> +	if (likely(cgrp))
> +		ret = bpf_prog_run_array_cg(&cgrp->bpf,
> +					    shim_prog->aux->cgroup_atype,
> +					    ctx, bpf_prog_run, 0, NULL);
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +#ifdef CONFIG_BPF_LSM
> +static enum cgroup_bpf_attach_type
> +bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
> +{
> +	if (attach_type != BPF_LSM_CGROUP)
> +		return to_cgroup_bpf_attach_type(attach_type);
> +	return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id);
> +}
> +#else
> +static enum cgroup_bpf_attach_type
> +bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
> +{
> +	if (attach_type != BPF_LSM_CGROUP)
> +		return to_cgroup_bpf_attach_type(attach_type);
> +	return -EOPNOTSUPP;
> +}
> +#endif /* CONFIG_BPF_LSM */
> +
>  void cgroup_bpf_offline(struct cgroup *cgrp)
>  {
>  	cgroup_get(cgrp);
> @@ -163,10 +248,16 @@ static void cgroup_bpf_release(struct work_struct *work)
>  
>  		hlist_for_each_entry_safe(pl, pltmp, progs, node) {
>  			hlist_del(&pl->node);
> -			if (pl->prog)
> +			if (pl->prog) {
> +				if (atype >= CGROUP_LSM_START && atype <= CGROUP_LSM_END)
nit.  Check expected_attach_type == BPF_LSM_CGROUP like
other places ?

> +					bpf_trampoline_unlink_cgroup_shim(pl->prog);
>  				bpf_prog_put(pl->prog);
> -			if (pl->link)
> +			}
> +			if (pl->link) {
> +				if (atype >= CGROUP_LSM_START && atype <= CGROUP_LSM_END)
Same here.

> +					bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
>  				bpf_cgroup_link_auto_detach(pl->link);
> +			}
>  			kfree(pl);
>  			static_branch_dec(&cgroup_bpf_enabled_key[atype]);
>  		}
> @@ -479,6 +570,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  	struct bpf_prog *old_prog = NULL;
>  	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
>  	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
> +	struct bpf_prog *new_prog = prog ? : link->link.prog;
> +	struct bpf_attach_target_info tgt_info = {};
>  	enum cgroup_bpf_attach_type atype;
>  	struct bpf_prog_list *pl;
>  	struct hlist_head *progs;
> @@ -495,7 +588,20 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  		/* replace_prog implies BPF_F_REPLACE, and vice versa */
>  		return -EINVAL;
>  
> -	atype = to_cgroup_bpf_attach_type(type);
> +	if (type == BPF_LSM_CGROUP) {
> +		if (replace_prog) {
> +			if (replace_prog->aux->attach_btf_id !=
> +			    new_prog->aux->attach_btf_id)
> +				return -EINVAL;
> +		}
This is no longer needed if it does not reuse the
replace_prog's cgroup_atype.

If old_prog is not NULL, it must have the same attach_btf_id.

This should still be true even bpf_cgroup_atype_find() return
a new cgroup_atype.  In that case, 'pl = find_attach_entry();'
must be NULL.

WDYT?

> +		err = bpf_check_attach_target(NULL, new_prog, NULL,
> +					      new_prog->aux->attach_btf_id,
> +					      &tgt_info);
If the above attach_btf_id check in unnecessary,
this can be done in bpf_trampoline_link_cgroup_shim() where
it is the only place that tgt_info will be used.
This should never fail also because this had been done
once during the prog load time, so doing it later is fine.

Then this whole "if (type == BPF_LSM_CGROUP)" case can be removed
from __cgroup_bpf_attach().

> +		if (err)
> +			return -EINVAL;
> +	}
> +
> +	atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
>  	if (atype < 0)
>  		return -EINVAL;
>  
> @@ -549,9 +655,15 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  	bpf_cgroup_storages_assign(pl->storage, storage);
>  	cgrp->bpf.flags[atype] = saved_flags;
>  
> +	if (type == BPF_LSM_CGROUP && !old_prog) {
hmm... I think this "!old_prog" test should not be here.

In allow_multi, old_prog can be NULL but it still needs
to bump the shim_link's refcnt by calling
bpf_trampoline_link_cgroup_shim().

This is a bit tricky.  Does it make sense ?

> +		err = bpf_trampoline_link_cgroup_shim(new_prog, &tgt_info, atype);
> +		if (err)
> +			goto cleanup;
> +	}
> +
>  	err = update_effective_progs(cgrp, atype);
>  	if (err)
> -		goto cleanup;
> +		goto cleanup_trampoline;
>  
>  	if (old_prog)
Then it needs a bpf_trampoline_unlink_cgroup_shim(old_prog) here.

>  		bpf_prog_put(old_prog);
> @@ -560,6 +672,10 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  	bpf_cgroup_storages_link(new_storage, cgrp, type);
>  	return 0;
>  
> +cleanup_trampoline:
> +	if (type == BPF_LSM_CGROUP && !old_prog)
The "!old_prog" test should also be removed.

> +		bpf_trampoline_unlink_cgroup_shim(new_prog);
> +
>  cleanup:
>  	if (old_prog) {
>  		pl->prog = old_prog;
> @@ -651,7 +767,13 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
>  	struct hlist_head *progs;
>  	bool found = false;
>  
> -	atype = to_cgroup_bpf_attach_type(link->type);
> +	if (link->type == BPF_LSM_CGROUP) {
> +		if (new_prog->aux->attach_btf_id !=
> +		    link->link.prog->aux->attach_btf_id)
> +			return -EINVAL;
This should be no longer needed also.
It will return -ENOENT later.

> +	}
> +
> +	atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
>  	if (atype < 0)
>  		return -EINVAL;
>  
> @@ -803,9 +925,15 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
>  	struct bpf_prog *old_prog;
>  	struct bpf_prog_list *pl;
>  	struct hlist_head *progs;
> +	u32 attach_btf_id = 0;
>  	u32 flags;
>  
> -	atype = to_cgroup_bpf_attach_type(type);
> +	if (prog)
> +		attach_btf_id = prog->aux->attach_btf_id;
> +	if (link)
> +		attach_btf_id = link->link.prog->aux->attach_btf_id;
> +
> +	atype = bpf_cgroup_atype_find(type, attach_btf_id);
>  	if (atype < 0)
>  		return -EINVAL;
>  
> @@ -832,6 +960,10 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
>  		purge_effective_progs(cgrp, old_prog, link, atype);
>  	}
>  
> +
> +	if (type == BPF_LSM_CGROUP)
> +		bpf_trampoline_unlink_cgroup_shim(old_prog ? : link->link.prog);
For the '!old_prog' case (link case), do the
bpf_trampoline_unlink_cgroup_shim() in bpf_cgroup_link_release().

For the old_prog case (non-link case),
the bpf_trampoline_unlink_cgroup_shim() can be done
under the same 'if (old_prog)' check a few lines below.

Then for both cases shim unlink will be closer to where bpf_prog_put()
will be done and have similar pattern as in __cgroup_bpf_attach() and
cgroup_bpf_release(), so easier to reason in the future:
shim unlink and then prog put.

[ ... ]

> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 5466e15be61f..45dfcece76e7 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -11,6 +11,8 @@
>  #include <linux/rcupdate_wait.h>
>  #include <linux/module.h>
>  #include <linux/static_call.h>
> +#include <linux/bpf_verifier.h>
> +#include <linux/bpf_lsm.h>
>  
>  /* dummy _ops. The verifier will operate on target program's ops. */
>  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> @@ -496,6 +498,169 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
>  	return err;
>  }
>  
> +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
> +static void bpf_shim_tramp_link_release(struct bpf_link *link)
> +{
> +	struct bpf_shim_tramp_link *shim_link =
> +		container_of(link, struct bpf_shim_tramp_link, link.link);
> +
> +	if (!shim_link->trampoline)
> +		return;
> +
> +	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
> +	bpf_trampoline_put(shim_link->trampoline);
> +}
> +
> +static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
> +{
> +	struct bpf_shim_tramp_link *shim_link =
> +		container_of(link, struct bpf_shim_tramp_link, link.link);
> +
> +	kfree(shim_link);
> +}
> +
> +static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
> +	.release = bpf_shim_tramp_link_release,
> +	.dealloc = bpf_shim_tramp_link_dealloc,
> +};
> +
> +static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
> +						     bpf_func_t bpf_func,
> +						     int cgroup_atype)
> +{
> +	struct bpf_shim_tramp_link *shim_link = NULL;
> +	struct bpf_prog *p;
> +
> +	shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
> +	if (!shim_link)
> +		return NULL;
> +
> +	p = bpf_prog_alloc(1, 0);
> +	if (!p) {
> +		kfree(shim_link);
> +		return NULL;
> +	}
> +
> +	p->jited = false;
> +	p->bpf_func = bpf_func;
> +
> +	p->aux->cgroup_atype = cgroup_atype;
> +	p->aux->attach_func_proto = prog->aux->attach_func_proto;
> +	p->aux->attach_btf_id = prog->aux->attach_btf_id;
> +	p->aux->attach_btf = prog->aux->attach_btf;
> +	btf_get(p->aux->attach_btf);
> +	p->type = BPF_PROG_TYPE_LSM;
> +	p->expected_attach_type = BPF_LSM_MAC;
> +	bpf_prog_inc(p);
> +	bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
> +		      &bpf_shim_tramp_link_lops, p);
> +
> +	return shim_link;
> +}
> +
> +static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
> +						    bpf_func_t bpf_func)
> +{
> +	struct bpf_tramp_link *link;
> +	int kind;
> +
> +	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
> +		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
> +			struct bpf_prog *p = link->link.prog;
> +
> +			if (p->bpf_func == bpf_func)
> +				return container_of(link, struct bpf_shim_tramp_link, link);
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> +				    struct bpf_attach_target_info *tgt_info,
> +				    int cgroup_atype)
> +{
> +	struct bpf_shim_tramp_link *shim_link = NULL;
> +	struct bpf_trampoline *tr;
> +	bpf_func_t bpf_func;
> +	u64 key;
> +	int err;
> +
> +	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> +					 prog->aux->attach_btf_id);
Directly get tgt_info here instead of doing it
in __cgroup_bpf_attach() and then passing in.
This is the only place needed it.

	err = bpf_check_attach_target(NULL, prog, NULL,
				prog->aux->attach_btf_id,
				&tgt_info);
	if (err)
		return err;

> +
> +	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> +	tr = bpf_trampoline_get(key, tgt_info);
> +	if (!tr)
> +		return  -ENOMEM;
> +
> +	mutex_lock(&tr->mutex);
> +
> +	shim_link = cgroup_shim_find(tr, bpf_func);
> +	if (shim_link) {
> +		/* Reusing existing shim attached by the other program. */
> +		atomic64_inc(&shim_link->link.link.refcnt);
Use bpf_link_inc() instead to pair with bpf_link_put().

> +		/* note, we're still holding tr refcnt from above */
It has to do a bpf_trampoline_put(tr) after mutex_unlock(&tr->mutex).
shim_link already holds one refcnt to tr.

> +
> +		mutex_unlock(&tr->mutex);
> +		return 0;
> +	}
> +
> +	/* Allocate and install new shim. */
> +
> +	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
> +	if (!shim_link) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
> +	if (err)
> +		goto out;
> +
> +	shim_link->trampoline = tr;
> +	/* note, we're still holding tr refcnt from above */
> +
> +	mutex_unlock(&tr->mutex);
> +
> +	return 0;
> +out:
> +	mutex_unlock(&tr->mutex);
> +
> +	if (shim_link)
> +		bpf_link_put(&shim_link->link.link);
> +
> +	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
Doing it here is because mutex_unlock(&tr->mutex) has
to be done first?  A comment will be useful.

How about passing tr to cgroup_shim_alloc(..., tr)
which is initializing everything else in shim_link anyway.
Then the 'if (!shim_link->trampoline)' in bpf_shim_tramp_link_release()
can go away also.

Like:

static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
						     bpf_func_t bpf_func,
						     struct bpf_trampoline *tr)

{
	/* ... */
	shim_link->trampoline = tr;

	return shim_link;
}

int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
				    int cgroup_atype)
{

	/* ... */

	/* Allocate and install new shim. */

	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype, tr);
	if (!shim_link) {
		err = -ENOMEM;
		goto error;
	}

	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
	if (err)
		goto error;

	mutex_unlock(&tr->mutex);

	return 0;

error:
	mutex_unlock(&tr->mutex);
	if (shim_link)
		bpf_link_put(&shim_link->link.link);
	else
		bpf_trampoline_put(tr);

	return err;
}

[ ... ]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index aedac2ac02b9..caa5740b39b3 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -7264,6 +7264,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
>  				reg_type_str(env, regs[BPF_REG_1].type));
>  			return -EACCES;
>  		}
> +		break;
> +	case BPF_FUNC_set_retval:
> +		if (env->prog->expected_attach_type == BPF_LSM_CGROUP) {
> +			if (!env->prog->aux->attach_func_proto->type) {
> +				/* Make sure programs that attach to void
> +				 * hooks don't try to modify return value.
> +				 */
> +				err = -EINVAL;
nit. Directly 'return -EINVAL;' after verbose() logging.

> +				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
> +			}
> +		}
> +		break;
>  	}
>  
>  	if (err)
> @@ -10474,6 +10486,23 @@ static int check_return_code(struct bpf_verifier_env *env)
>  	case BPF_PROG_TYPE_SK_LOOKUP:
>  		range = tnum_range(SK_DROP, SK_PASS);
>  		break;
> +
> +	case BPF_PROG_TYPE_LSM:
> +		if (env->prog->expected_attach_type == BPF_LSM_CGROUP) {
> +			if (!env->prog->aux->attach_func_proto->type) {
nit. Check 'if ( ... != BPF_LSM_CGROUP) return 0;' first to remove
one level of indentation.

> +				/* Make sure programs that attach to void
> +				 * hooks don't try to modify return value.
> +				 */
> +				range = tnum_range(1, 1);
> +			}
> +		} else {
> +			/* regular BPF_PROG_TYPE_LSM programs can return
> +			 * any value.
> +			 */
> +			return 0;
> +		}
> +		break;
> +
>  	case BPF_PROG_TYPE_EXT:
>  		/* freplace program can return anything as its return value
>  		 * depends on the to-be-replaced kernel func or bpf program.
> @@ -10490,6 +10519,8 @@ static int check_return_code(struct bpf_verifier_env *env)
>  
>  	if (!tnum_in(range, reg->var_off)) {
>  		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
> +		if (env->prog->expected_attach_type == BPF_LSM_CGROUP)
> +			verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
This is not accurate to verbose on void-return only.
For int-return lsm look, the BPF_LSM_CGROUP prog returning
neither 0 nor 1 will also trigger this range check failure.

>  		return -EINVAL;
>  	}
>  
> @@ -14713,6 +14744,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
>  		fallthrough;
>  	case BPF_MODIFY_RETURN:
>  	case BPF_LSM_MAC:
> +	case BPF_LSM_CGROUP:
>  	case BPF_TRACE_FENTRY:
>  	case BPF_TRACE_FEXIT:
>  		if (!btf_type_is_func(t)) {