[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <377d5ad0-cf4f-4c8b-c23d-ed37dce4ad9f@fb.com>
Date: Wed, 18 Dec 2019 19:10:38 +0000
From: Yonghong Song <yhs@...com>
To: Paul Chaignon <paul.chaignon@...nge.com>,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>
CC: "paul.chaignon@...il.com" <paul.chaignon@...il.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Martin Lau <kafai@...com>, Song Liu <songliubraving@...com>,
Andrii Nakryiko <andriin@...com>
Subject: Re: [PATCH bpf-next 1/3] bpf: Single-cpu updates for per-cpu maps
On 12/18/19 6:23 AM, Paul Chaignon wrote:
> Currently, userspace programs have to update the values of all CPUs at
> once when updating per-cpu maps. This limitation prevents the update of
> a single CPU's value without the risk of missing concurrent updates on
> other CPU's values.
>
> This patch allows userspace to update the value of a specific CPU in
> per-cpu maps. The CPU whose value should be updated is encoded in the
> 32 upper-bits of the flags argument, as follows. The new BPF_CPU flag
> can be combined with existing flags.
>
> bpf_map_update_elem(..., cpuid << 32 | BPF_CPU)
Some additional comments beyond Alexei's one.
>
> Signed-off-by: Paul Chaignon <paul.chaignon@...nge.com>
> ---
> include/uapi/linux/bpf.h | 4 +++
> kernel/bpf/arraymap.c | 19 ++++++++-----
> kernel/bpf/hashtab.c | 49 ++++++++++++++++++++--------------
> kernel/bpf/local_storage.c | 16 +++++++----
> kernel/bpf/syscall.c | 17 +++++++++---
> tools/include/uapi/linux/bpf.h | 4 +++
> 6 files changed, 74 insertions(+), 35 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index dbbcf0b02970..2efb17d2c77a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -316,6 +316,10 @@ enum bpf_attach_type {
> #define BPF_NOEXIST 1 /* create new element if it didn't exist */
> #define BPF_EXIST 2 /* update existing element */
> #define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
> +#define BPF_CPU 8 /* single-cpu update for per-cpu maps */
> +
> +/* CPU mask for single-cpu updates */
> +#define BPF_CPU_MASK 0xFFFFFFFF00000000ULL
BPF_F_CPU_MASK?
>
> /* flags for BPF_MAP_CREATE command */
> #define BPF_F_NO_PREALLOC (1U << 0)
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index f0d19bbb9211..a96e94696819 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -302,7 +302,8 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
> u32 index = *(u32 *)key;
> char *val;
>
> - if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
> + if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_F_LOCK &
> + ~BPF_CPU) > BPF_EXIST))
Maybe create a macro ARRAY_UPDATE_FLAG_MASK similar to existing
ARRAY_CREATE_FLAG_MASK? This will make a little easier to follow,
esp. we got three individual flags here.
There are possibly some other places as well below can be done
in a similar way.
> /* unknown flags */
> return -EINVAL;
>
> @@ -341,7 +342,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
> int cpu, off = 0;
> u32 size;
>
> - if (unlikely(map_flags > BPF_EXIST))
> + if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_CPU) > BPF_EXIST))
~(BPF_F_CPU_MASK | BPF_F_CPU) or create a macro for like
ARRAY_UPDATE_CPU_MASK for (BPF_F_CPU_MASK | BPF_F_CPU)?
> /* unknown flags */
> return -EINVAL;
>
> @@ -349,7 +350,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
> /* all elements were pre-allocated, cannot insert a new one */
> return -E2BIG;
>
> - if (unlikely(map_flags == BPF_NOEXIST))
> + if (unlikely(map_flags & BPF_NOEXIST))
> /* all elements already exist */
> return -EEXIST;
>
> @@ -362,9 +363,15 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
> size = round_up(map->value_size, 8);
> rcu_read_lock();
> pptr = array->pptrs[index & array->index_mask];
> - for_each_possible_cpu(cpu) {
> - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
> - off += size;
> + if (map_flags & BPF_CPU) {
> + bpf_long_memcpy(per_cpu_ptr(pptr, map_flags >> 32), value,
> + size);
> + } else {
> + for_each_possible_cpu(cpu) {
> + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off,
> + size);
> + off += size;
> + }
> }
> rcu_read_unlock();
> return 0;
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index 22066a62c8c9..be45c7c4509f 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -695,12 +695,12 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
> }
>
> static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
> - void *value, bool onallcpus)
> + void *value, int cpuid)
> {
> - if (!onallcpus) {
> + if (cpuid == -1) {
Magic number -1 and -2 should be macros?
> /* copy true value_size bytes */
> memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
> - } else {
> + } else if (cpuid == -2) {
> u32 size = round_up(htab->map.value_size, 8);
> int off = 0, cpu;
>
> @@ -709,6 +709,10 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
[...]
Powered by blists - more mailing lists