[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201104221526.dv6qfpfp5lk2t7zw@kafai-mbp.dhcp.thefacebook.com>
Date: Wed, 4 Nov 2020 14:15:35 -0800
From: Martin KaFai Lau <kafai@...com>
To: KP Singh <kpsingh@...omium.org>
CC: <linux-kernel@...r.kernel.org>, <bpf@...r.kernel.org>,
Song Liu <songliubraving@...com>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Paul Turner <pjt@...gle.com>, Jann Horn <jannh@...gle.com>,
Hao Luo <haoluo@...gle.com>
Subject: Re: [PATCH bpf-next v3 1/9] bpf: Implement task local storage
On Wed, Nov 04, 2020 at 05:44:45PM +0100, KP Singh wrote:
[ ... ]
> +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
> +{
> + struct bpf_local_storage_data *sdata;
> + struct task_struct *task;
> + unsigned int f_flags;
> + struct pid *pid;
> + int fd, err;
> +
> + fd = *(int *)key;
> + pid = pidfd_get_pid(fd, &f_flags);
> + if (IS_ERR(pid))
> + return ERR_CAST(pid);
> +
> + /* We should be in an RCU read side critical section, it should be safe
> + * to call pid_task.
> + */
> + WARN_ON_ONCE(!rcu_read_lock_held());
> + task = pid_task(pid, PIDTYPE_PID);
> + if (!task) {
> + err = -ENOENT;
> + goto out;
> + }
> +
> + sdata = task_storage_lookup(task, map, true);
> + put_pid(pid);
> + return sdata ? sdata->data : NULL;
> +out:
> + put_pid(pid);
> + return ERR_PTR(err);
> +}
> +
> +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
> + void *value, u64 map_flags)
> +{
> + struct bpf_local_storage_data *sdata;
> + struct task_struct *task;
> + unsigned int f_flags;
> + struct pid *pid;
> + int fd, err;
> +
> + fd = *(int *)key;
> + pid = pidfd_get_pid(fd, &f_flags);
> + if (IS_ERR(pid))
> + return PTR_ERR(pid);
> +
> + /* We should be in an RCU read side critical section, it should be safe
> + * to call pid_task.
> + */
> + WARN_ON_ONCE(!rcu_read_lock_held());
> + task = pid_task(pid, PIDTYPE_PID);
> + if (!task) {
> + err = -ENOENT;
> + goto out;
> + }
> +
> + sdata = bpf_local_storage_update(
> + task, (struct bpf_local_storage_map *)map, value, map_flags);
It seems the task is protected by rcu here and the task may be going away.
Is it ok?
or the following comment in the later "BPF_CALL_4(bpf_task_storage_get, ...)"
is no longer valid?
/* This helper must only called from where the task is guaranteed
* to have a refcount and cannot be freed.
*/
> +
> + err = PTR_ERR_OR_ZERO(sdata);
> +out:
> + put_pid(pid);
> + return err;
> +}
> +
[ ... ]
> +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
> + task, void *, value, u64, flags)
> +{
> + struct bpf_local_storage_data *sdata;
> +
> + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
> + return (unsigned long)NULL;
> +
> + /* explicitly check that the task_storage_ptr is not
> + * NULL as task_storage_lookup returns NULL in this case and
> + * bpf_local_storage_update expects the owner to have a
> + * valid storage pointer.
> + */
> + if (!task_storage_ptr(task))
> + return (unsigned long)NULL;
> +
> + sdata = task_storage_lookup(task, map, true);
> + if (sdata)
> + return (unsigned long)sdata->data;
> +
> + /* This helper must only called from where the task is guaranteed
> + * to have a refcount and cannot be freed.
> + */
> + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
> + sdata = bpf_local_storage_update(
> + task, (struct bpf_local_storage_map *)map, value,
> + BPF_NOEXIST);
> + return IS_ERR(sdata) ? (unsigned long)NULL :
> + (unsigned long)sdata->data;
> + }
> +
> + return (unsigned long)NULL;
> +}
[ ... ]
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 8f50c9c19f1b..f3fe9f53f93c 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
> map->map_type != BPF_MAP_TYPE_ARRAY &&
> map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
> map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
> - map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
> + map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
> + map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
This is to enable spin lock support in a map's value. Without peeking
patch 5, I was confused a bit here. It seems patch 5 was missed when
inode storage was added.
> return -ENOTSUPP;
> if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
> map->value_size) {
Powered by blists - more mailing lists