lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  5 Oct 2022 10:17:06 -0700
From:   Cong Wang <xiyou.wangcong@...il.com>
To:     netdev@...r.kernel.org
Cc:     yangpeihao@...u.edu.cn, toke@...hat.com, jhs@...atatu.com,
        jiri@...nulli.us, bpf@...r.kernel.org, sdf@...gle.com,
        Cong Wang <cong.wang@...edance.com>
Subject: [RFC Patch v6 2/5] bpf: Add map in map support to rbtree

From: Cong Wang <cong.wang@...edance.com>

Signed-off-by: Cong Wang <cong.wang@...edance.com>
---
 include/linux/bpf.h       |   4 +
 include/linux/bpf_types.h |   1 +
 include/uapi/linux/bpf.h  |   1 +
 kernel/bpf/rbtree.c       | 158 ++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c      |   7 ++
 5 files changed, 171 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9e7d46d16032..d4d85df1e8ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1913,6 +1913,10 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_fd_rbtree_map_update_elem(struct bpf_map *map, struct file *map_file,
+				  void *key, void *value, u64 map_flags);
+int bpf_fd_rbtree_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_fd_rbtree_map_pop_elem(struct bpf_map *map, void *value);
 
 int bpf_get_file_flag(int flags);
 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c53ba6de1613..d1ef13b08e28 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -128,6 +128,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_RBTREE, rbtree_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_RBTREE_OF_MAPS, rbtree_map_in_map_ops)
 
 BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9492cd3af701..994a3e42a4fa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -936,6 +936,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_RBTREE,
+	BPF_MAP_TYPE_RBTREE_OF_MAPS,
 };
 
 /* Note that tracing related programs such as
diff --git a/kernel/bpf/rbtree.c b/kernel/bpf/rbtree.c
index f1a9b1c40b8b..43d3d4193ce4 100644
--- a/kernel/bpf/rbtree.c
+++ b/kernel/bpf/rbtree.c
@@ -12,6 +12,7 @@
 #include <linux/bpf_mem_alloc.h>
 #include <linux/math.h>
 #include <linux/seq_file.h>
+#include "map_in_map.h"
 
 #define RBTREE_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
@@ -443,3 +444,160 @@ const struct bpf_map_ops rbtree_map_ops = {
 	.iter_seq_info = &rbtree_map_iter_seq_info,
 };
 
+static struct bpf_map *rbtree_map_in_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_map *map, *inner_map_meta;
+
+	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
+	if (IS_ERR(inner_map_meta))
+		return inner_map_meta;
+
+	map = rbtree_map_alloc(attr);
+	if (IS_ERR(map)) {
+		bpf_map_meta_free(inner_map_meta);
+		return map;
+	}
+
+	map->inner_map_meta = inner_map_meta;
+	return map;
+}
+
+static void *fd_rbtree_map_get_ptr(const struct bpf_map *map, struct rbtree_elem *e)
+{
+	return *(void **)(e->key + roundup(map->key_size, 8));
+}
+
+static void rbtree_map_in_map_purge(struct bpf_map *map)
+{
+	struct rbtree_map *rb = rbtree_map(map);
+	struct rbtree_elem *e, *tmp;
+
+	rbtree_walk_safe(e, tmp, &rb->root) {
+		void *ptr = fd_rbtree_map_get_ptr(map, e);
+
+		map->ops->map_fd_put_ptr(ptr);
+	}
+}
+
+static void rbtree_map_in_map_free(struct bpf_map *map)
+{
+	struct rbtree_map *rb = rbtree_map(map);
+
+	bpf_map_meta_free(map->inner_map_meta);
+	rbtree_map_in_map_purge(map);
+	bpf_map_area_free(rb);
+}
+
+/* Called from eBPF program */
+static void *rbtree_map_in_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_map **inner_map = rbtree_map_lookup_elem(map, key);
+
+	if (!inner_map)
+		return NULL;
+
+	return READ_ONCE(*inner_map);
+}
+
+static int rbtree_map_in_map_alloc_check(union bpf_attr *attr)
+{
+	if (attr->value_size != sizeof(u32))
+		return -EINVAL;
+	return rbtree_map_alloc_check(attr);
+}
+
+/* Called from eBPF program */
+static int rbtree_map_in_map_pop_elem(struct bpf_map *map, void *value)
+{
+	struct rbtree_map *rb = rbtree_map(map);
+	struct rbtree_elem *e = elem_rb_first(&rb->root);
+	struct bpf_map **inner_map;
+	unsigned long flags;
+
+	if (!e)
+		return -ENOENT;
+	raw_spin_lock_irqsave(&rb->lock, flags);
+	rb_erase(&e->rbnode, &rb->root);
+	raw_spin_unlock_irqrestore(&rb->lock, flags);
+	inner_map = fd_rbtree_map_get_ptr(map, e);
+	*(void **)value = *inner_map;
+	bpf_mem_cache_free(&rb->ma, e);
+	atomic_dec(&rb->nr_entries);
+	return 0;
+}
+
+/* only called from syscall */
+int bpf_fd_rbtree_map_pop_elem(struct bpf_map *map, void *value)
+{
+	struct bpf_map *ptr;
+	int ret = 0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	ret = rbtree_map_in_map_pop_elem(map, &ptr);
+	if (!ret)
+		*(u32 *)value = map->ops->map_fd_sys_lookup_elem(ptr);
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* only called from syscall */
+int bpf_fd_rbtree_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+	void **ptr;
+	int ret = 0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	ptr = rbtree_map_lookup_elem(map, key);
+	if (ptr)
+		*value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* only called from syscall */
+int bpf_fd_rbtree_map_update_elem(struct bpf_map *map, struct file *map_file,
+				  void *key, void *value, u64 map_flags)
+{
+	void *ptr;
+	int ret;
+	u32 ufd = *(u32 *)value;
+
+	ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	ret = rbtree_map_update_elem(map, key, &ptr, map_flags);
+	if (ret)
+		map->ops->map_fd_put_ptr(ptr);
+
+	return ret;
+}
+
+const struct bpf_map_ops rbtree_map_in_map_ops = {
+	.map_alloc_check = rbtree_map_in_map_alloc_check,
+	.map_alloc = rbtree_map_in_map_alloc,
+	.map_free = rbtree_map_in_map_free,
+	.map_get_next_key = rbtree_map_get_next_key,
+	.map_lookup_elem = rbtree_map_in_map_lookup_elem,
+	.map_update_elem = rbtree_map_update_elem,
+	.map_pop_elem = rbtree_map_in_map_pop_elem,
+	.map_delete_elem = rbtree_map_delete_elem,
+	.map_fd_get_ptr = bpf_map_fd_get_ptr,
+	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+	.map_check_btf = map_check_no_btf,
+	.map_btf_id = &rbtree_map_btf_ids[0],
+};
+
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7b373a5e861f..1b968dc38500 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -213,6 +213,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
 		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 						  flags);
 		rcu_read_unlock();
+	} else if (map->map_type == BPF_MAP_TYPE_RBTREE_OF_MAPS) {
+		rcu_read_lock();
+		err = bpf_fd_rbtree_map_update_elem(map, f.file, key, value,
+						    flags);
+		rcu_read_unlock();
 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
 		/* rcu_read_lock() is not needed */
 		err = bpf_fd_reuseport_array_update_elem(map, key, value,
@@ -1832,6 +1837,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 	    map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_pop_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_RBTREE_OF_MAPS) {
+		bpf_fd_rbtree_map_pop_elem(map, value);
 	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
 		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
-- 
2.34.1

Powered by blists - more mailing lists