[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250729182550.185356-2-ameryhung@gmail.com>
Date: Tue, 29 Jul 2025 11:25:39 -0700
From: Amery Hung <ameryhung@...il.com>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org,
alexei.starovoitov@...il.com,
andrii@...nel.org,
daniel@...earbox.net,
memxor@...il.com,
kpsingh@...nel.org,
martin.lau@...nel.org,
yonghong.song@...ux.dev,
song@...nel.org,
haoluo@...gle.com,
ameryhung@...il.com,
kernel-team@...a.com
Subject: [RFC PATCH bpf-next v1 01/11] bpf: Convert bpf_selem_unlink_map to failable
To prepare for changing bpf_local_storage_map_bucket::lock to rqspinlock,
convert bpf_selem_unlink_map() to failable. It still always succeeds and
returns 0 for now.
Since some operations updating local storage cannot fail in the middle,
open-code bpf_selem_unlink_map() to take the b->lock before the
operation. There are two such locations:
- bpf_local_storage_alloc()
The first selem will be unlinked from smap if cmpxchg owner_storage_ptr
fails, which should not fail. Therefore, hold b->lock when linking
until allocation complete. Helpers that assume b->lock is held by
callers are introduced: bpf_selem_link_map_nolock() and
bpf_selem_unlink_map_nolock().
- bpf_local_storage_update()
The three step update process: link_map(new_selem),
link_storage(new_selem), and unlink_map(old_selem) should not fail in
the middle. Hence, lock both b->lock before the update process starts.
While locking two different buckets decided by the hash function
introduces different locking order, this will not cause ABBA deadlock
since this is performed under local_storage->lock.
- bpf_selem_unlink()
bpf_selem_unlink_map() and bpf_selem_unlink_storage() should either
all succeed or fail as a whole instead of failing in the middle.
As the first step, open code bpf_selem_unlink_map(). A later patch
will open code bpf_selem_unlink_storage(). Then, unlink_map and
unlink_storage will be done after successfully acquiring both
local_storage->lock and b->lock.
One caller of bpf_selem_unlink_map() cannot run recursively (e.g.,
called by helpers in tracing bpf programs) and therefore cannot deadlock.
Assert that these calls cannot fail instead of handling them.
- bpf_local_storage_destroy()
Called by owner (e.g., task_struct, sk, ...). Will not recur and
cause AA deadlock.
Signed-off-by: Amery Hung <ameryhung@...il.com>
---
kernel/bpf/bpf_local_storage.c | 75 ++++++++++++++++++++++++++++------
1 file changed, 62 insertions(+), 13 deletions(-)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index b931fbceb54d..7e39b88ef795 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -409,7 +409,7 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
hlist_add_head_rcu(&selem->snode, &local_storage->list);
}
-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
@@ -417,7 +417,7 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
if (unlikely(!selem_linked_to_map_lockless(selem)))
/* selem has already be unlinked from smap */
- return;
+ return 0;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
b = select_bucket(smap, selem);
@@ -425,6 +425,14 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
if (likely(selem_linked_to_map(selem)))
hlist_del_init_rcu(&selem->map_node);
raw_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem)
+{
+ if (likely(selem_linked_to_map(selem)))
+ hlist_del_init_rcu(&selem->map_node);
}
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
@@ -439,13 +447,33 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
raw_spin_unlock_irqrestore(&b->lock, flags);
}
+static void bpf_selem_link_map_nolock(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map_bucket *b)
+{
+ RCU_INIT_POINTER(SDATA(selem)->smap, smap);
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+}
+
void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
{
- /* Always unlink from map before unlinking from local_storage
- * because selem will be freed after successfully unlinked from
- * the local_storage.
- */
- bpf_selem_unlink_map(selem);
+ struct bpf_local_storage_map_bucket *b;
+ struct bpf_local_storage_map *smap;
+ unsigned long flags;
+
+ if (likely(selem_linked_to_map_lockless(selem))) {
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ b = select_bucket(smap, selem);
+ raw_spin_lock_irqsave(&b->lock, flags);
+
+ /* Always unlink from map before unlinking from local_storage
+ * because selem will be freed after successfully unlinked from
+ * the local_storage.
+ */
+ bpf_selem_unlink_map_nolock(selem);
+ raw_spin_unlock_irqrestore(&b->lock, flags);
+ }
+
bpf_selem_unlink_storage(selem, reuse_now);
}
@@ -487,6 +515,8 @@ int bpf_local_storage_alloc(void *owner,
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
int err;
err = mem_charge(smap, owner, sizeof(*storage));
@@ -509,7 +539,10 @@ int bpf_local_storage_alloc(void *owner,
storage->owner = owner;
bpf_selem_link_storage_nolock(storage, first_selem);
- bpf_selem_link_map(smap, first_selem);
+
+ b = select_bucket(smap, first_selem);
+ raw_spin_lock_irqsave(&b->lock, flags);
+ bpf_selem_link_map_nolock(smap, first_selem, b);
owner_storage_ptr =
(struct bpf_local_storage **)owner_storage(smap, owner);
@@ -525,7 +558,8 @@ int bpf_local_storage_alloc(void *owner,
*/
prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
if (unlikely(prev_storage)) {
- bpf_selem_unlink_map(first_selem);
+ bpf_selem_unlink_map_nolock(first_selem);
+ raw_spin_unlock_irqrestore(&b->lock, flags);
err = -EAGAIN;
goto uncharge;
@@ -539,6 +573,7 @@ int bpf_local_storage_alloc(void *owner,
* bucket->list under rcu_read_lock().
*/
}
+ raw_spin_unlock_irqrestore(&b->lock, flags);
return 0;
@@ -560,8 +595,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map_bucket *b, *old_b;
HLIST_HEAD(old_selem_free_list);
- unsigned long flags;
+ unsigned long flags, b_flags, old_b_flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -645,20 +681,31 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
}
+ b = select_bucket(smap, selem);
+ old_b = old_sdata ? select_bucket(smap, SELEM(old_sdata)) : b;
+
+ raw_spin_lock_irqsave(&b->lock, b_flags);
+ if (b != old_b)
+ raw_spin_lock_irqsave(&old_b->lock, old_b_flags);
+
alloc_selem = NULL;
/* First, link the new selem to the map */
- bpf_selem_link_map(smap, selem);
+ bpf_selem_link_map_nolock(smap, selem, b);
/* Second, link (and publish) the new selem to local_storage */
bpf_selem_link_storage_nolock(local_storage, selem);
/* Third, remove old selem, SELEM(old_sdata) */
if (old_sdata) {
- bpf_selem_unlink_map(SELEM(old_sdata));
+ bpf_selem_unlink_map_nolock(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
true, &old_selem_free_list);
}
+ if (b != old_b)
+ raw_spin_unlock_irqrestore(&old_b->lock, old_b_flags);
+ raw_spin_unlock_irqrestore(&b->lock, b_flags);
+
unlock:
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_selem_free_list(&old_selem_free_list, false);
@@ -736,6 +783,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
HLIST_HEAD(free_selem_list);
struct hlist_node *n;
unsigned long flags;
+ int err;
storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held());
bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL);
@@ -754,7 +802,8 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
/* Always unlink from map before unlinking from
* local_storage.
*/
- bpf_selem_unlink_map(selem);
+ err = bpf_selem_unlink_map(selem);
+ WARN_ON(err);
/* If local_storage list has only one element, the
* bpf_selem_unlink_storage_nolock() will return true.
* Otherwise, it will return false. The current loop iteration
--
2.47.3
Powered by blists - more mailing lists