lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218175628.1460321-11-ameryhung@gmail.com>
Date: Thu, 18 Dec 2025 09:56:20 -0800
From: Amery Hung <ameryhung@...il.com>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org,
	alexei.starovoitov@...il.com,
	andrii@...nel.org,
	daniel@...earbox.net,
	memxor@...il.com,
	martin.lau@...nel.org,
	kpsingh@...nel.org,
	yonghong.song@...ux.dev,
	song@...nel.org,
	haoluo@...gle.com,
	ameryhung@...il.com,
	kernel-team@...a.com
Subject: [PATCH bpf-next v3 10/16] bpf: Support lockless unlink when freeing map or local storage

Introduce bpf_selem_unlink_lockless() to properly handle errors returned
from rqspinlock in bpf_local_storage_map_free() and
bpf_local_storage_destroy() where the operation must succeeds.

The idea of bpf_selem_unlink_lockless() is to allow an selem to be
partially linked and use refcount to determine when and who can free the
selem. An selem initially is fully linked to a map and a local storage
and therefore selem->link_cnt is set to 2. Under normal circumstances,
bpf_selem_unlink_lockless() will be able to grab locks and unlink
an selem from map and local storage in sequeunce, just like
bpf_selem_unlink(), and then add it to a local tofree list provide by
the caller. However, if any of the lock attempts fails, it will
only clear SDATA(selem)->smap or selem->local_storage depending on the
caller and decrement link_cnt to signal that the corresponding data
structure holding a reference to the selem is gone. Then, only when both
map and local storage are gone, an selem can be free by the last caller
that turns link_cnt to 0.

To make sure bpf_obj_free_fields() is done only once and when map is
still present, it is called when unlinking an selem from b->list under
b->lock.

To make sure uncharging memory is only done once and when owner is still
present, only unlink selem from local_storage->list in
bpf_local_storage_destroy() and return the amount of memory to uncharge
to the caller (i.e., owner) since the map associated with an selem may
already be gone and map->ops->map_local_storage_uncharge can no longer
be referenced.

Finally, access of selem, SDATA(selem)->smap and selem->local_storage
are racy. Callers will protect these fields with RCU.

Co-developed-by: Martin KaFai Lau <martin.lau@...nel.org>
Signed-off-by: Martin KaFai Lau <martin.lau@...nel.org>
Signed-off-by: Amery Hung <ameryhung@...il.com>
---
 include/linux/bpf_local_storage.h |  2 +-
 kernel/bpf/bpf_local_storage.c    | 77 +++++++++++++++++++++++++++++--
 2 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 20918c31b7e5..1fd908c44fb6 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -80,9 +80,9 @@ struct bpf_local_storage_elem {
 						 * after raw_spin_unlock
 						 */
 	};
+	atomic_t link_cnt;
 	u16 size;
 	bool use_kmalloc_nolock;
-	/* 4 bytes hole */
 	/* The data is stored in another cacheline to minimize
 	 * the number of cachelines access during a cache hit.
 	 */
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 62201552dca6..4c682d5aef7f 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -97,6 +97,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
 			if (swap_uptrs)
 				bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value);
 		}
+		atomic_set(&selem->link_cnt, 2);
 		selem->size = smap->elem_size;
 		selem->use_kmalloc_nolock = smap->use_kmalloc_nolock;
 		return selem;
@@ -200,9 +201,11 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
 	/* The bpf_local_storage_map_free will wait for rcu_barrier */
 	smap = rcu_dereference_check(SDATA(selem)->smap, 1);
 
-	migrate_disable();
-	bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
-	migrate_enable();
+	if (smap) {
+		migrate_disable();
+		bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+		migrate_enable();
+	}
 	kfree_nolock(selem);
 }
 
@@ -227,7 +230,8 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
 		 * is only supported in task local storage, where
 		 * smap->use_kmalloc_nolock == true.
 		 */
-		bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+		if (smap)
+			bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
 		__bpf_selem_free(selem, reuse_now);
 		return;
 	}
@@ -419,6 +423,71 @@ int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
 	return err;
 }
 
+/* Callers of bpf_selem_unlink_lockless() */
+#define BPF_LOCAL_STORAGE_MAP_FREE	0
+#define BPF_LOCAL_STORAGE_DESTROY	1
+
+/*
+ * Unlink an selem from map and local storage with lockless fallback if callers
+ * are racing or rqspinlock returns error. It should only be called by
+ * bpf_local_storage_destroy() or bpf_local_storage_map_free().
+ */
+static void bpf_selem_unlink_lockless(struct bpf_local_storage_elem *selem,
+				      struct hlist_head *to_free, int caller)
+{
+	struct bpf_local_storage *local_storage;
+	struct bpf_local_storage_map_bucket *b;
+	struct bpf_local_storage_map *smap;
+	unsigned long flags;
+	int err, unlink = 0;
+
+	local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held());
+	smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+
+	/*
+	 * Free special fields immediately as SDATA(selem)->smap will be cleared.
+	 * No BPF program should be reading the selem.
+	 */
+	if (smap) {
+		b = select_bucket(smap, selem);
+		err = raw_res_spin_lock_irqsave(&b->lock, flags);
+		if (!err) {
+			if (likely(selem_linked_to_map(selem))) {
+				hlist_del_init_rcu(&selem->map_node);
+				bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+				RCU_INIT_POINTER(SDATA(selem)->smap, NULL);
+				unlink++;
+			}
+			raw_res_spin_unlock_irqrestore(&b->lock, flags);
+		} else if (caller == BPF_LOCAL_STORAGE_MAP_FREE) {
+			RCU_INIT_POINTER(SDATA(selem)->smap, NULL);
+		}
+	}
+
+	/*
+	 * Only let destroy() unlink from local_storage->list and do mem_uncharge
+	 * as owner is guaranteed to be valid in destroy().
+	 */
+	if (local_storage && caller == BPF_LOCAL_STORAGE_DESTROY) {
+		err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+		if (!err) {
+			hlist_del_init_rcu(&selem->snode);
+			unlink++;
+			raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+		}
+		RCU_INIT_POINTER(selem->local_storage, NULL);
+	}
+
+	/*
+	 * Normally, an selem can be unlink under local_storage->lock and b->lock, and
+	 * then added to a local to_free list. However, if destroy() and map_free() are
+	 * racing or rqspinlock returns errors in unlikely situations (unlink != 2), free
+	 * the selem only after both map_free() and destroy() drop the refcnt.
+	 */
+	if (unlink == 2 || atomic_dec_and_test(&selem->link_cnt))
+		hlist_add_head(&selem->free_node, to_free);
+}
+
 void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
 				      struct bpf_local_storage_map *smap,
 				      struct bpf_local_storage_elem *selem)
-- 
2.47.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ