[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220521065614.w7jqj4xg2skfg73u@kafai-mbp>
Date: Fri, 20 May 2022 23:56:14 -0700
From: Martin KaFai Lau <kafai@...com>
To: Stanislav Fomichev <sdf@...gle.com>
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org, ast@...nel.org,
daniel@...earbox.net, andrii@...nel.org
Subject: Re: [PATCH bpf-next v7 04/11] bpf: minimize number of allocated lsm
slots per program
On Wed, May 18, 2022 at 03:55:24PM -0700, Stanislav Fomichev wrote:
> Previous patch adds 1:1 mapping between all 211 LSM hooks
> and bpf_cgroup program array. Instead of reserving a slot per
> possible hook, reserve 10 slots per cgroup for lsm programs.
> Those slots are dynamically allocated on demand and reclaimed.
>
> struct cgroup_bpf {
> struct bpf_prog_array * effective[33]; /* 0 264 */
> /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
> struct hlist_head progs[33]; /* 264 264 */
> /* --- cacheline 8 boundary (512 bytes) was 16 bytes ago --- */
> u8 flags[33]; /* 528 33 */
>
> /* XXX 7 bytes hole, try to pack */
>
> struct list_head storages; /* 568 16 */
> /* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */
> struct bpf_prog_array * inactive; /* 584 8 */
> struct percpu_ref refcnt; /* 592 16 */
> struct work_struct release_work; /* 608 72 */
>
> /* size: 680, cachelines: 11, members: 7 */
> /* sum members: 673, holes: 1, sum holes: 7 */
> /* last cacheline: 40 bytes */
> };
>
> Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
> ---
> include/linux/bpf-cgroup-defs.h | 3 +-
> include/linux/bpf_lsm.h | 6 --
> kernel/bpf/bpf_lsm.c | 5 --
> kernel/bpf/cgroup.c | 135 +++++++++++++++++++++++++++++---
> 4 files changed, 125 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
> index d5a70a35dace..359d3f16abea 100644
> --- a/include/linux/bpf-cgroup-defs.h
> +++ b/include/linux/bpf-cgroup-defs.h
> @@ -10,7 +10,8 @@
>
> struct bpf_prog_array;
>
> -#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */
> +/* Maximum number of concurrently attachable per-cgroup LSM hooks. */
> +#define CGROUP_LSM_NUM 10
>
> enum cgroup_bpf_attach_type {
> CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
> diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
> index 7f0e59f5f9be..613de44aa429 100644
> --- a/include/linux/bpf_lsm.h
> +++ b/include/linux/bpf_lsm.h
> @@ -43,7 +43,6 @@ extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
> void bpf_inode_storage_free(struct inode *inode);
>
> int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
> -int bpf_lsm_hook_idx(u32 btf_id);
>
> #else /* !CONFIG_BPF_LSM */
>
> @@ -74,11 +73,6 @@ static inline int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
> return -ENOENT;
> }
>
> -static inline int bpf_lsm_hook_idx(u32 btf_id)
> -{
> - return -EINVAL;
> -}
> -
> #endif /* CONFIG_BPF_LSM */
>
> #endif /* _LINUX_BPF_LSM_H */
> diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
> index 654c23577ad3..96503c3e7a71 100644
> --- a/kernel/bpf/bpf_lsm.c
> +++ b/kernel/bpf/bpf_lsm.c
> @@ -71,11 +71,6 @@ int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
> return 0;
> }
>
> -int bpf_lsm_hook_idx(u32 btf_id)
> -{
> - return btf_id_set_index(&bpf_lsm_hooks, btf_id);
> -}
> -
> int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
> const struct bpf_prog *prog)
> {
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 2c356a38f4cf..a959cdd22870 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -132,15 +132,110 @@ unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
> }
>
> #ifdef CONFIG_BPF_LSM
> +struct list_head unused_bpf_lsm_atypes;
> +struct list_head used_bpf_lsm_atypes;
> +
> +struct bpf_lsm_attach_type {
> + int index;
> + u32 btf_id;
> + int usecnt;
> + struct list_head atypes;
> + struct rcu_head rcu_head;
> +};
> +
> +static int __init bpf_lsm_attach_type_init(void)
> +{
> + struct bpf_lsm_attach_type *atype;
> + int i;
> +
> + INIT_LIST_HEAD_RCU(&unused_bpf_lsm_atypes);
> + INIT_LIST_HEAD_RCU(&used_bpf_lsm_atypes);
> +
> + for (i = 0; i < CGROUP_LSM_NUM; i++) {
> + atype = kzalloc(sizeof(*atype), GFP_KERNEL);
> + if (!atype)
> + continue;
> +
> + atype->index = i;
> + list_add_tail_rcu(&atype->atypes, &unused_bpf_lsm_atypes);
> + }
> +
> + return 0;
> +}
> +late_initcall(bpf_lsm_attach_type_init);
> +
> static enum cgroup_bpf_attach_type bpf_lsm_attach_type_get(u32 attach_btf_id)
> {
> - return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id);
> + struct bpf_lsm_attach_type *atype;
> +
> + lockdep_assert_held(&cgroup_mutex);
> +
> + list_for_each_entry_rcu(atype, &used_bpf_lsm_atypes, atypes) {
> + if (atype->btf_id != attach_btf_id)
> + continue;
> +
> + atype->usecnt++;
> + return CGROUP_LSM_START + atype->index;
> + }
> +
> + atype = list_first_or_null_rcu(&unused_bpf_lsm_atypes, struct bpf_lsm_attach_type, atypes);
> + if (!atype)
> + return -E2BIG;
> +
> + list_del_rcu(&atype->atypes);
> + atype->btf_id = attach_btf_id;
> + atype->usecnt = 1;
> + list_add_tail_rcu(&atype->atypes, &used_bpf_lsm_atypes);
> +
> + return CGROUP_LSM_START + atype->index;
> +}
> +
> +static void bpf_lsm_attach_type_reclaim(struct rcu_head *head)
> +{
> + struct bpf_lsm_attach_type *atype =
> + container_of(head, struct bpf_lsm_attach_type, rcu_head);
> +
> + atype->btf_id = 0;
> + atype->usecnt = 0;
> + list_add_tail_rcu(&atype->atypes, &unused_bpf_lsm_atypes);
hmm...... no need to hold the cgroup_mutex when changing
the unused_bpf_lsm_atypes list ?
but it is a rcu callback, so spinlock is needed.
> +}
> +
> +static void bpf_lsm_attach_type_put(u32 attach_btf_id)
> +{
> + struct bpf_lsm_attach_type *atype;
> +
> + lockdep_assert_held(&cgroup_mutex);
> +
> + list_for_each_entry_rcu(atype, &used_bpf_lsm_atypes, atypes) {
> + if (atype->btf_id != attach_btf_id)
> + continue;
> +
> + if (--atype->usecnt <= 0) {
> + list_del_rcu(&atype->atypes);
> + WARN_ON_ONCE(atype->usecnt < 0);
> +
> + /* call_rcu here prevents atype reuse within
> + * the same rcu grace period.
> + * shim programs use __bpf_prog_enter_lsm_cgroup
> + * which starts RCU read section.
It is a bit unclear for me to think through why
there is no need to assign 'shim_prog->aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID'
here before reclaim and the shim_prog->bpf_func does not need to check
shim_prog->aux->cgroup_atype before using it.
It will be very useful to have a few word comments here to explain this.
> + */
> + call_rcu(&atype->rcu_head, bpf_lsm_attach_type_reclaim);
How about doing this bpf_lsm_attach_type_put() in bpf_prog_free_deferred().
And only do it for the shim_prog but not the cgroup-lsm prog.
The shim_prog is the only one needing cgroup_atype. Then the cgroup_atype
naturally can be reused when the shim_prog is being destroyed.
bpf_prog_free_deferred has already gone through a rcu grace
period (__bpf_prog_put_rcu) and it can block, so cgroup_mutex
can be used.
The need for the rcu_head here should go away also. The v6 array approach
could be reconsidered.
The cgroup-lsm prog does not necessarily need to hold a usecnt to the cgroup_atype.
Their aux->cgroup_atype can be CGROUP_BPF_ATTACH_TYPE_INVALID.
My understanding is the replace_prog->aux->cgroup_atype during attach
is an optimization, it can always search again.
Powered by blists - more mailing lists