[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190528185741.GB20578@tower.DHCP.thefacebook.com>
Date: Tue, 28 May 2019 18:57:45 +0000
From: Roman Gushchin <guro@...com>
To: Stanislav Fomichev <sdf@...gle.com>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>,
"davem@...emloft.net" <davem@...emloft.net>,
"ast@...nel.org" <ast@...nel.org>,
"daniel@...earbox.net" <daniel@...earbox.net>
Subject: Re: [PATCH bpf-next v3 3/4] bpf: cgroup: properly use bpf_prog_array
api
On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote:
> Now that we don't have __rcu markers on the bpf_prog_array helpers,
> let's use proper rcu_dereference_protected to obtain array pointer
> under mutex.
>
> We also don't need __rcu annotations on cgroup_bpf.inactive since
> it's not read/updated concurrently.
>
> v3:
> * amend cgroup_rcu_dereference to include percpu_ref_is_dying;
> cgroup_bpf is now reference counted and we don't hold cgroup_mutex
> anymore in cgroup_bpf_release
>
> v2:
> * replace xchg with rcu_swap_protected
>
> Cc: Roman Gushchin <guro@...com>
> Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
> ---
> include/linux/bpf-cgroup.h | 2 +-
> kernel/bpf/cgroup.c | 32 +++++++++++++++++++++-----------
> 2 files changed, 22 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 9f100fc422c3..b631ee75762d 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -72,7 +72,7 @@ struct cgroup_bpf {
> u32 flags[MAX_BPF_ATTACH_TYPE];
>
> /* temp storage for effective prog array used by prog_attach/detach */
> - struct bpf_prog_array __rcu *inactive;
> + struct bpf_prog_array *inactive;
>
> /* reference counter used to detach bpf programs after cgroup removal */
> struct percpu_ref refcnt;
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index d995edbe816d..118b70175dd9 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -22,6 +22,13 @@
> DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
> EXPORT_SYMBOL(cgroup_bpf_enabled_key);
>
> +#define cgroup_rcu_dereference(cgrp, p) \
> + rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) || \
> + percpu_ref_is_dying(&cgrp->bpf.refcnt))
> +
> +#define cgroup_rcu_swap(rcu_ptr, ptr) \
> + rcu_swap_protected(rcu_ptr, ptr, lockdep_is_held(&cgroup_mutex))
> +
> void cgroup_bpf_offline(struct cgroup *cgrp)
> {
> cgroup_get(cgrp);
> @@ -38,6 +45,7 @@ static void cgroup_bpf_release(struct work_struct *work)
> struct cgroup *cgrp = container_of(work, struct cgroup,
> bpf.release_work);
> enum bpf_cgroup_storage_type stype;
> + struct bpf_prog_array *old_array;
> unsigned int type;
>
> for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
> @@ -54,7 +62,9 @@ static void cgroup_bpf_release(struct work_struct *work)
> kfree(pl);
> static_branch_dec(&cgroup_bpf_enabled_key);
> }
> - bpf_prog_array_free(cgrp->bpf.effective[type]);
> + old_array = cgroup_rcu_dereference(cgrp,
> + cgrp->bpf.effective[type]);
> + bpf_prog_array_free(old_array);
> }
>
> percpu_ref_exit(&cgrp->bpf.refcnt);
> @@ -126,7 +136,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
> */
> static int compute_effective_progs(struct cgroup *cgrp,
> enum bpf_attach_type type,
> - struct bpf_prog_array __rcu **array)
> + struct bpf_prog_array **array)
> {
> enum bpf_cgroup_storage_type stype;
> struct bpf_prog_array *progs;
> @@ -164,17 +174,15 @@ static int compute_effective_progs(struct cgroup *cgrp,
> }
> } while ((p = cgroup_parent(p)));
>
> - rcu_assign_pointer(*array, progs);
> + *array = progs;
> return 0;
> }
>
> static void activate_effective_progs(struct cgroup *cgrp,
> enum bpf_attach_type type,
> - struct bpf_prog_array __rcu *array)
> + struct bpf_prog_array *old_array)
> {
> - struct bpf_prog_array __rcu *old_array;
> -
> - old_array = xchg(&cgrp->bpf.effective[type], array);
> + cgroup_rcu_swap(cgrp->bpf.effective[type], old_array);
> /* free prog array after grace period, since __cgroup_bpf_run_*()
> * might be still walking the array
> */
> @@ -191,7 +199,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
> * that array below is variable length
> */
> #define NR ARRAY_SIZE(cgrp->bpf.effective)
> - struct bpf_prog_array __rcu *arrays[NR] = {};
> + struct bpf_prog_array *arrays[NR] = {};
> int ret, i;
>
> ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
> @@ -477,10 +485,13 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
> enum bpf_attach_type type = attr->query.attach_type;
> struct list_head *progs = &cgrp->bpf.progs[type];
> u32 flags = cgrp->bpf.flags[type];
> + struct bpf_prog_array *effective;
> int cnt, ret = 0, i;
>
> + effective = cgroup_rcu_dereference(cgrp, cgrp->bpf.effective[type]);
> +
> if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
> - cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
> + cnt = bpf_prog_array_length(effective);
> else
> cnt = prog_list_length(progs);
>
> @@ -497,8 +508,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
> }
>
> if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
> - return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
> - prog_ids, cnt);
> + return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
> } else {
> struct bpf_prog_list *pl;
> u32 id;
> --
> 2.22.0.rc1.257.g3120a18244-goog
>
Acked-by: Roman Gushchin <guro@...com>
Thanks!
Powered by blists - more mailing lists