[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAADnVQJMM+PSq_nDL4rXbC42D+yX5iRo-G_y8qma5+OepcAESw@mail.gmail.com>
Date: Tue, 25 Nov 2025 15:11:13 -0800
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: Leon Hwang <leon.hwang@...ux.dev>
Cc: bpf <bpf@...r.kernel.org>, Alexei Starovoitov <ast@...nel.org>,
Andrii Nakryiko <andrii@...nel.org>, Daniel Borkmann <daniel@...earbox.net>, Jiri Olsa <jolsa@...nel.org>,
Yonghong Song <yonghong.song@...ux.dev>, Song Liu <song@...nel.org>, Eduard <eddyz87@...il.com>,
Daniel Xu <dxu@...uu.xyz>, Daniel Müller <deso@...teo.net>,
Martin KaFai Lau <martin.lau@...ux.dev>, John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>, Stanislav Fomichev <sdf@...ichev.me>, Hao Luo <haoluo@...gle.com>,
Shuah Khan <shuah@...nel.org>, Jason Xing <kerneljasonxing@...il.com>,
Tao Chen <chen.dylane@...ux.dev>, Willem de Bruijn <willemb@...gle.com>,
Paul Chaignon <paul.chaignon@...il.com>, Anton Protopopov <a.s.protopopov@...il.com>,
Kumar Kartikeya Dwivedi <memxor@...il.com>, Mykyta Yatsenko <yatsenko@...a.com>,
Tobias Klauser <tklauser@...tanz.ch>, kernel-patches-bot@...com,
LKML <linux-kernel@...r.kernel.org>,
"open list:KERNEL SELFTEST FRAMEWORK" <linux-kselftest@...r.kernel.org>
Subject: Re: [PATCH bpf-next v11 4/8] bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS
flags support for percpu_hash and lru_percpu_hash maps
On Tue, Nov 25, 2025 at 7:00 AM Leon Hwang <leon.hwang@...ux.dev> wrote:
>
> Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash
> maps to allow updating values for all CPUs with a single value for both
> update_elem and update_batch APIs.
>
> Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash
> maps to allow:
>
> * update value for specified CPU for both update_elem and update_batch
> APIs.
> * lookup value for specified CPU for both lookup_elem and lookup_batch
> APIs.
>
> The BPF_F_CPU flag is passed via:
>
> * map_flags along with embedded cpu info.
> * elem_flags along with embedded cpu info.
>
> Signed-off-by: Leon Hwang <leon.hwang@...ux.dev>
> ---
> v10 -> v11:
> - Drop buggy '(u32)map_flags > BPF_F_ALL_CPUS' check in htab_map_check_update_flags().
why?
> - Update 'map_flags != BPF_EXIST' to '!(map_flags & BPF_EXIST)' in
> __htab_lru_percpu_map_update_elem().
> ---
> include/linux/bpf.h | 4 +-
> kernel/bpf/hashtab.c | 96 ++++++++++++++++++++++++++++++--------------
> kernel/bpf/syscall.c | 2 +-
> 3 files changed, 69 insertions(+), 33 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 01a99e3a3e51..f79d2ae27335 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2761,7 +2761,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
> struct bpf_func_state *caller,
> struct bpf_func_state *callee);
>
> -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
> +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
> int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
> int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
> u64 flags);
> @@ -3833,6 +3833,8 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
> {
> switch (map_type) {
> case BPF_MAP_TYPE_PERCPU_ARRAY:
> + case BPF_MAP_TYPE_PERCPU_HASH:
> + case BPF_MAP_TYPE_LRU_PERCPU_HASH:
> return true;
> default:
> return false;
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index c8a9b27f8663..c768bf71d60f 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -932,7 +932,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
> }
>
> static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
> - void *value, bool onallcpus)
> + void *value, bool onallcpus, u64 map_flags)
> {
> void *ptr;
>
> @@ -943,19 +943,28 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
> bpf_obj_free_fields(htab->map.record, ptr);
> } else {
> u32 size = round_up(htab->map.value_size, 8);
> - int off = 0, cpu;
> + void *val;
> + int cpu;
> +
> + if (map_flags & BPF_F_CPU) {
> + cpu = map_flags >> 32;
> + ptr = per_cpu_ptr(pptr, cpu);
> + copy_map_value(&htab->map, ptr, value);
> + bpf_obj_free_fields(htab->map.record, ptr);
> + return;
> + }
>
> for_each_possible_cpu(cpu) {
> ptr = per_cpu_ptr(pptr, cpu);
> - copy_map_value_long(&htab->map, ptr, value + off);
> + val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
> + copy_map_value(&htab->map, ptr, val);
> bpf_obj_free_fields(htab->map.record, ptr);
> - off += size;
> }
> }
> }
>
> static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
> - void *value, bool onallcpus)
> + void *value, bool onallcpus, u64 map_flags)
> {
> /* When not setting the initial value on all cpus, zero-fill element
> * values for other cpus. Otherwise, bpf program has no way to ensure
> @@ -973,7 +982,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
> zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
> }
> } else {
> - pcpu_copy_value(htab, pptr, value, onallcpus);
> + pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
> }
> }
>
> @@ -985,7 +994,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
> static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> void *value, u32 key_size, u32 hash,
> bool percpu, bool onallcpus,
> - struct htab_elem *old_elem)
> + struct htab_elem *old_elem, u64 map_flags)
> {
> u32 size = htab->map.value_size;
> bool prealloc = htab_is_prealloc(htab);
> @@ -1043,7 +1052,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> pptr = *(void __percpu **)ptr;
> }
>
> - pcpu_init_value(htab, pptr, value, onallcpus);
> + pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
>
> if (!prealloc)
> htab_elem_set_ptr(l_new, key_size, pptr);
> @@ -1147,7 +1156,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
> }
>
> l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
> - l_old);
> + l_old, map_flags);
> if (IS_ERR(l_new)) {
> /* all pre-allocated elements are in use or memory exhausted */
> ret = PTR_ERR(l_new);
> @@ -1249,6 +1258,15 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
> return ret;
> }
>
> +static int htab_map_check_update_flags(bool onallcpus, u64 map_flags)
> +{
> + if (unlikely(!onallcpus && map_flags > BPF_EXIST))
> + return -EINVAL;
> + if (unlikely(onallcpus && (map_flags & BPF_F_LOCK)))
> + return -EINVAL;
> + return 0;
> +}
> +
> static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
> void *value, u64 map_flags,
> bool percpu, bool onallcpus)
> @@ -1262,9 +1280,9 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
> u32 key_size, hash;
> int ret;
>
> - if (unlikely(map_flags > BPF_EXIST))
> - /* unknown flags */
> - return -EINVAL;
> + ret = htab_map_check_update_flags(onallcpus, map_flags);
> + if (unlikely(ret))
> + return ret;
>
> WARN_ON_ONCE(!bpf_rcu_lock_held());
>
> @@ -1289,7 +1307,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
> /* Update value in-place */
> if (percpu) {
> pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
> - value, onallcpus);
> + value, onallcpus, map_flags);
> } else {
> void **inner_map_pptr = htab_elem_value(l_old, key_size);
>
> @@ -1298,7 +1316,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
> }
> } else {
> l_new = alloc_htab_elem(htab, key, value, key_size,
> - hash, percpu, onallcpus, NULL);
> + hash, percpu, onallcpus, NULL, map_flags);
> if (IS_ERR(l_new)) {
> ret = PTR_ERR(l_new);
> goto err;
> @@ -1324,9 +1342,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
> u32 key_size, hash;
> int ret;
>
> - if (unlikely(map_flags > BPF_EXIST))
> - /* unknown flags */
> - return -EINVAL;
> + ret = htab_map_check_update_flags(onallcpus, map_flags);
> + if (unlikely(ret))
> + return ret;
>
> WARN_ON_ONCE(!bpf_rcu_lock_held());
>
> @@ -1342,7 +1360,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
> * to remove older elem from htab and this removal
> * operation will need a bucket lock.
> */
> - if (map_flags != BPF_EXIST) {
> + if (!(map_flags & BPF_EXIST)) {
> l_new = prealloc_lru_pop(htab, key, hash);
> if (!l_new)
> return -ENOMEM;
It's not in the diff, but this is broken.
You tried to allow BPF_EXIST combination here, but didn't update
check_flags(),
so BPF_[NO]EXIST | BPF_F_CPU combination check_flags() will always
return 0, so BPF_[NO]EXIST flag will make no difference.
When you add features, always always add unit tests.
Patch 8 is not it. It's testing F_CPU. It doesn't check
that BPF_EXIST | BPF_F_CPU correctly errors when an element doesn't exist.
v10 was close, but then you decided to add this BPF_EXIST feature
and did it in a sloppy way. Why ?
Focus on one thing only. Land it and then do the next one.
11 revisions and still no go... it is not a good sign.
pw-bot: cr
Powered by blists - more mailing lists