[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200429053948.gmyxmh57t7v6nayg@kafai-mbp>
Date: Tue, 28 Apr 2020 22:39:48 -0700
From: Martin KaFai Lau <kafai@...com>
To: Yonghong Song <yhs@...com>
CC: Andrii Nakryiko <andriin@...com>, <bpf@...r.kernel.org>,
<netdev@...r.kernel.org>, Alexei Starovoitov <ast@...com>,
Daniel Borkmann <daniel@...earbox.net>, <kernel-team@...com>
Subject: Re: [PATCH bpf-next v1 07/19] bpf: create anonymous bpf iterator
On Mon, Apr 27, 2020 at 01:12:42PM -0700, Yonghong Song wrote:
> A new bpf command BPF_ITER_CREATE is added.
>
> The anonymous bpf iterator is seq_file based.
> The seq_file private data are referenced by targets.
> The bpf_iter infrastructure allocated additional space
> at seq_file->private after the space used by targets
> to store some meta data, e.g.,
> prog: prog to run
> session_id: an unique id for each opened seq_file
> seq_num: how many times bpf programs are queried in this session
> has_last: indicate whether or not bpf_prog has been called after
> all valid objects have been processed
>
> A map between file and prog/link is established to help
> fops->release(). When fops->release() is called, just based on
> inode and file, bpf program cannot be located since target
> seq_priv_size not available. This map helps retrieve the prog
> whose reference count needs to be decremented.
How about putting "struct extra_priv_data" at the beginning of
the seq_file's private store instead since its size is known.
seq->private can point to an aligned byte after
+sizeof(struct extra_priv_data).
[ ... ]
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index fc1ce5ee5c3f..1f4e778d1814 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
[ ... ]
> @@ -26,6 +40,50 @@ static bool bpf_iter_inited = false;
> /* protect bpf_iter_link.link->prog upddate */
> static struct mutex bpf_iter_mutex;
>
> +/* Since at anon seq_file release function, the prog cannot
> + * be retrieved since target seq_priv_size is not available.
> + * Keep a list of <anon_file, prog> mapping, so that
> + * at file release stage, the prog can be released properly.
> + */
> +static struct list_head anon_iter_info;
> +static struct mutex anon_iter_info_mutex;
> +
> +/* incremented on every opened seq_file */
> +static atomic64_t session_id;
> +
> +static u32 get_total_priv_dsize(u32 old_size)
> +{
> + return roundup(old_size, 8) + sizeof(struct extra_priv_data);
> +}
> +
> +static void *get_extra_priv_dptr(void *old_ptr, u32 old_size)
> +{
> + return old_ptr + roundup(old_size, 8);
> +}
> +
> +static int anon_iter_release(struct inode *inode, struct file *file)
> +{
> + struct anon_file_prog_assoc *finfo;
> +
> + mutex_lock(&anon_iter_info_mutex);
> + list_for_each_entry(finfo, &anon_iter_info, list) {
> + if (finfo->file == file) {
> + bpf_prog_put(finfo->prog);
> + list_del(&finfo->list);
> + kfree(finfo);
> + break;
> + }
> + }
> + mutex_unlock(&anon_iter_info_mutex);
> +
> + return seq_release_private(inode, file);
> +}
> +
> +static const struct file_operations anon_bpf_iter_fops = {
> + .read = seq_read,
> + .release = anon_iter_release,
> +};
> +
> int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
> {
> struct bpf_iter_target_info *tinfo;
[ ... ]
> @@ -150,3 +223,90 @@ int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
> mutex_unlock(&bpf_iter_mutex);
> return ret;
> }
> +
> +static void init_seq_file(void *priv_data, struct bpf_iter_target_info *tinfo,
> + struct bpf_prog *prog)
> +{
> + struct extra_priv_data *extra_data;
> +
> + if (tinfo->target_feature & BPF_DUMP_SEQ_NET_PRIVATE)
> + set_seq_net_private((struct seq_net_private *)priv_data,
> + current->nsproxy->net_ns);
> +
> + extra_data = get_extra_priv_dptr(priv_data, tinfo->seq_priv_size);
> + extra_data->session_id = atomic64_add_return(1, &session_id);
> + extra_data->prog = prog;
> + extra_data->seq_num = 0;
> + extra_data->has_last = false;
> +}
> +
> +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
> +{
> + struct anon_file_prog_assoc *finfo;
> + struct bpf_iter_target_info *tinfo;
> + struct bpf_prog *prog;
> + u32 total_priv_dsize;
> + void *priv_data;
> +
> + finfo = kmalloc(sizeof(*finfo), GFP_USER | __GFP_NOWARN);
> + if (!finfo)
> + return -ENOMEM;
> +
> + mutex_lock(&bpf_iter_mutex);
> + prog = link->link.prog;
> + bpf_prog_inc(prog);
> + mutex_unlock(&bpf_iter_mutex);
> +
> + tinfo = link->tinfo;
> + total_priv_dsize = get_total_priv_dsize(tinfo->seq_priv_size);
> + priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
> + if (!priv_data) {
> + bpf_prog_sub(prog, 1);
Could prog's refcnt go 0 here? If yes, bpf_prog_put() should be used.
> + kfree(finfo);
> + return -ENOMEM;
> + }
> +
> + init_seq_file(priv_data, tinfo, prog);
> +
> + finfo->file = file;
> + finfo->prog = prog;
> +
> + mutex_lock(&anon_iter_info_mutex);
> + list_add(&finfo->list, &anon_iter_info);
> + mutex_unlock(&anon_iter_info_mutex);
> + return 0;
> +}
> +
Powered by blists - more mailing lists