[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <153575075452.30050.2166341202758341587.stgit@kernel>
Date: Fri, 31 Aug 2018 23:25:54 +0200
From: Mauricio Vasquez B <mauricio.vasquez@...ito.it>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org
Subject: [RFC PATCH bpf-next v2 1/4] bpf: add bpf queue and stack maps
Implement two new kind of maps that support the peek, push and pop
operations.
A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@...ito.it>
---
include/linux/bpf.h | 8 +
include/linux/bpf_types.h | 2
include/uapi/linux/bpf.h | 36 ++++
kernel/bpf/Makefile | 2
kernel/bpf/helpers.c | 44 +++++
kernel/bpf/queue_stack_maps.c | 353 +++++++++++++++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 96 +++++++++++
kernel/bpf/verifier.c | 6 +
net/core/filter.c | 6 +
9 files changed, 543 insertions(+), 10 deletions(-)
create mode 100644 kernel/bpf/queue_stack_maps.c
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..1d39b9096d9f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,11 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+ void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+
+ /* funcs callable from eBPF programs */
+ void *(*map_lookup_or_init_elem)(struct bpf_map *map, void *key,
+ void *value);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -806,6 +811,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..8d955f11f1cd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -67,3 +67,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, queue_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..0a5b904ba42f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
};
enum bpf_map_type {
@@ -127,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
};
enum bpf_prog_type {
@@ -459,6 +462,28 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * void *bpf_map_pop_elem(struct bpf_map *map)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * Pointer to the element of *NULL* if there is not any.
+ *
+ * void *bpf_map_peek_elem(struct bpf_map *map)
+ * Description
+ * Return an element from *map* without removing it.
+ * Return
+ * Pointer to the element of *NULL* if there is not any.
+ *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -786,14 +811,14 @@ union bpf_attr {
*
* int ret;
* struct bpf_tunnel_key key = {};
- *
+ *
* ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
* if (ret < 0)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* if (key.remote_ipv4 != 0x0a000001)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* return TC_ACT_OK; // accept packet
*
* This interface can also be used with all encapsulation devices
@@ -2226,7 +2251,10 @@ union bpf_attr {
FN(get_current_cgroup_id), \
FN(get_local_storage), \
FN(sk_select_reuseport), \
- FN(skb_ancestor_cgroup_id),
+ FN(skb_ancestor_cgroup_id), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..17afae9e65f3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..c1ff567b69f1 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,6 +76,50 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
+BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return map->ops->map_update_elem(map, NULL, value, flags);
+}
+
+const struct bpf_func_proto bpf_map_push_elem_proto = {
+ .func = bpf_map_push_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_map_pop_elem, struct bpf_map *, map)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return (unsigned long) map->ops->map_lookup_and_delete_elem(map, NULL);
+}
+
+const struct bpf_func_proto bpf_map_pop_elem_proto = {
+ .func = bpf_map_pop_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+};
+
+BPF_CALL_1(bpf_map_peek_elem, struct bpf_map *, map)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return (unsigned long) map->ops->map_lookup_elem(map, NULL);
+}
+
+const struct bpf_func_proto bpf_map_peek_elem_proto = {
+ .func = bpf_map_pop_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+};
+
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.func = bpf_user_rnd_u32,
.gpl_only = false,
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
new file mode 100644
index 000000000000..97e652bf6df5
--- /dev/null
+++ b/kernel/bpf/queue_stack_maps.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * queue_stack_maps.c: BPF queue and stack maps
+ *
+ * Copyright (c) 2018 Politecnico di Torino
+ */
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "percpu_freelist.h"
+
+#define QUEUE_STACK_CREATE_FLAG_MASK \
+ (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+enum queue_type {
+ QUEUE,
+ STACK,
+};
+
+struct bpf_queue {
+ struct bpf_map map;
+ struct list_head head;
+ struct pcpu_freelist freelist;
+ void *nodes;
+ enum queue_type type;
+ raw_spinlock_t lock;
+ atomic_t count;
+ u32 node_size;
+};
+
+struct queue_node {
+ struct pcpu_freelist_node fnode;
+ struct bpf_queue *queue;
+ struct list_head list;
+ struct rcu_head rcu;
+ char element[0] __aligned(8);
+};
+
+static bool queue_map_is_prealloc(struct bpf_queue *queue)
+{
+ return !(queue->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
+/* Called from syscall */
+static int queue_map_alloc_check(union bpf_attr *attr)
+{
+ /* check sanity of attributes */
+ if (attr->max_entries == 0 || attr->key_size != 0 ||
+ attr->value_size == 0 ||
+ attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+ return -EINVAL;
+
+ if (attr->value_size > KMALLOC_MAX_SIZE)
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
+static int prealloc_init(struct bpf_queue *queue)
+{
+ u32 node_size = sizeof(struct queue_node) +
+ round_up(queue->map.value_size, 8);
+ u32 num_entries = queue->map.max_entries;
+ int err;
+
+ queue->nodes = bpf_map_area_alloc(node_size * num_entries,
+ queue->map.numa_node);
+ if (!queue->nodes)
+ return -ENOMEM;
+
+ err = pcpu_freelist_init(&queue->freelist);
+ if (err)
+ goto free_nodes;
+
+ pcpu_freelist_populate(&queue->freelist,
+ queue->nodes +
+ offsetof(struct queue_node, fnode),
+ node_size, num_entries);
+
+ return 0;
+
+free_nodes:
+ bpf_map_area_free(queue->nodes);
+ return err;
+}
+
+static void prealloc_destroy(struct bpf_queue *queue)
+{
+ bpf_map_area_free(queue->nodes);
+ pcpu_freelist_destroy(&queue->freelist);
+}
+
+static struct bpf_map *queue_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_queue *queue;
+ u64 cost = sizeof(*queue);
+ int ret;
+
+ queue = kzalloc(sizeof(*queue), GFP_USER);
+ if (!queue)
+ return ERR_PTR(-ENOMEM);
+
+ bpf_map_init_from_attr(&queue->map, attr);
+
+ queue->node_size = sizeof(struct queue_node) +
+ round_up(attr->value_size, 8);
+ cost += (u64) attr->max_entries * queue->node_size;
+ if (cost >= U32_MAX - PAGE_SIZE) {
+ ret = -E2BIG;
+ goto free_queue;
+ }
+
+ queue->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = bpf_map_precharge_memlock(queue->map.pages);
+ if (ret)
+ goto free_queue;
+
+ INIT_LIST_HEAD(&queue->head);
+
+ raw_spin_lock_init(&queue->lock);
+
+ if (queue->map.map_type == BPF_MAP_TYPE_QUEUE)
+ queue->type = QUEUE;
+ else if (queue->map.map_type == BPF_MAP_TYPE_STACK)
+ queue->type = STACK;
+
+ if (queue_map_is_prealloc(queue))
+ ret = prealloc_init(queue);
+ if (ret)
+ goto free_queue;
+
+ return &queue->map;
+
+free_queue:
+ kfree(queue);
+ return ERR_PTR(ret);
+}
+
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void queue_map_free(struct bpf_map *map)
+{
+ struct bpf_queue *queue = container_of(map, struct bpf_queue, map);
+ struct queue_node *l;
+
+ /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+ * so the programs (can be more than one that used this map) were
+ * disconnected from events. Wait for outstanding critical sections in
+ * these programs to complete
+ */
+ synchronize_rcu();
+
+ /* some of queue_elem_free_rcu() callbacks for elements of this map may
+ * not have executed. Wait for them.
+ */
+ rcu_barrier();
+ if (!queue_map_is_prealloc(queue))
+ list_for_each_entry(l, &queue->head, list) {
+ list_del(&l->list);
+ kfree(l);
+ }
+ else
+ prealloc_destroy(queue);
+ kfree(queue);
+}
+
+static void queue_elem_free_rcu(struct rcu_head *head)
+{
+ struct queue_node *l = container_of(head, struct queue_node, rcu);
+ struct bpf_queue *queue = l->queue;
+
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
+ * we're calling kfree, otherwise deadlock is possible if kprobes
+ * are placed somewhere inside of slub
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ if (queue_map_is_prealloc(queue))
+ pcpu_freelist_push(&queue->freelist, &l->fnode);
+ else
+ kfree(l);
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+}
+
+static void *__queue_map_lookup(struct bpf_map *map, bool delete)
+{
+ struct bpf_queue *queue = container_of(map, struct bpf_queue, map);
+ unsigned long flags;
+ struct queue_node *node;
+
+ raw_spin_lock_irqsave(&queue->lock, flags);
+
+ if (list_empty(&queue->head)) {
+ raw_spin_unlock_irqrestore(&queue->lock, flags);
+ return NULL;
+ }
+
+ node = list_first_entry(&queue->head, struct queue_node, list);
+
+ if (delete) {
+ if (!queue_map_is_prealloc(queue))
+ atomic_dec(&queue->count);
+
+ list_del(&node->list);
+ call_rcu(&node->rcu, queue_elem_free_rcu);
+ }
+
+ raw_spin_unlock_irqrestore(&queue->lock, flags);
+ return &node->element;
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return __queue_map_lookup(map, false);
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_map_lookup_and_delete_elem(struct bpf_map *map, void *key)
+{
+ return __queue_map_lookup(map, true);
+}
+
+static struct queue_node *queue_map_delete_oldest_node(struct bpf_queue *queue)
+{
+ struct queue_node *node = NULL;
+ unsigned long irq_flags;
+
+ raw_spin_lock_irqsave(&queue->lock, irq_flags);
+
+ if (list_empty(&queue->head))
+ goto out;
+
+ switch (queue->type) {
+ case QUEUE:
+ node = list_first_entry(&queue->head, struct queue_node, list);
+ break;
+ case STACK:
+ node = list_last_entry(&queue->head, struct queue_node, list);
+ break;
+ default:
+ goto out;
+ }
+
+ list_del(&node->list);
+out:
+ raw_spin_unlock_irqrestore(&queue->lock, irq_flags);
+ return node;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 flags)
+{
+ struct bpf_queue *queue = container_of(map, struct bpf_queue, map);
+ unsigned long irq_flags;
+ struct queue_node *new;
+ /* BPF_EXIST is used to force making room for a new element in case the
+ * map is full
+ */
+ bool replace = (flags & BPF_EXIST);
+
+ /* Check supported flags for queue and stack maps */
+ if (flags & BPF_NOEXIST || flags > BPF_EXIST)
+ return -EINVAL;
+
+again:
+ if (!queue_map_is_prealloc(queue)) {
+ if (atomic_inc_return(&queue->count) > queue->map.max_entries) {
+ atomic_dec(&queue->count);
+ if (!replace)
+ return -E2BIG;
+ new = queue_map_delete_oldest_node(queue);
+ /* It is possible that in the meanwhile the queue/stack
+ * became empty and there was not an 'oldest' element
+ * to delete. In that case, try again
+ */
+ if (!new)
+ goto again;
+ } else {
+ new = kmalloc_node(queue->node_size,
+ GFP_ATOMIC | __GFP_NOWARN,
+ queue->map.numa_node);
+ if (!new) {
+ atomic_dec(&queue->count);
+ return -ENOMEM;
+ }
+ }
+ } else {
+ struct pcpu_freelist_node *l;
+
+ l = pcpu_freelist_pop(&queue->freelist);
+ if (!l) {
+ if (!replace)
+ return -E2BIG;
+ new = queue_map_delete_oldest_node(queue);
+ if (!new)
+ /* TODO: This should goto again, but this causes an
+ * infinite loop when the elements are not being
+ * returned to the free list by the
+ * queue_elem_free_rcu() callback
+ */
+ return -ENOMEM;
+ } else
+ new = container_of(l, struct queue_node, fnode);
+ }
+
+ memcpy(new->element, value, queue->map.value_size);
+ new->queue = queue;
+
+ raw_spin_lock_irqsave(&queue->lock, irq_flags);
+ switch (queue->type) {
+ case QUEUE:
+ list_add_tail(&new->list, &queue->head);
+ break;
+
+ case STACK:
+ list_add(&new->list, &queue->head);
+ break;
+ }
+ raw_spin_unlock_irqrestore(&queue->lock, irq_flags);
+
+ return 0;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return -EINVAL;
+}
+
+/* Called from syscall */
+static int queue_map_get_next_key(struct bpf_map *map, void *key,
+ void *next_key)
+{
+ return -EINVAL;
+}
+
+const struct bpf_map_ops queue_map_ops = {
+ .map_alloc_check = queue_map_alloc_check,
+ .map_alloc = queue_map_alloc,
+ .map_free = queue_map_free,
+ .map_lookup_elem = queue_map_lookup_elem,
+ .map_lookup_and_delete_elem = queue_map_lookup_and_delete_elem,
+ .map_update_elem = queue_map_update_elem,
+ .map_delete_elem = queue_map_delete_elem,
+ .map_get_next_key = queue_map_get_next_key,
+};
+
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8339d81cba1d..7703795cb4e4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -652,6 +652,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
return -ENOTSUPP;
}
+static void *__bpf_copy_key(void *ukey, u64 key_size)
+{
+ if (key_size)
+ return memdup_user(ukey, key_size);
+
+ if (ukey)
+ return ERR_PTR(-EINVAL);
+
+ return NULL;
+}
+
/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
@@ -679,7 +690,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -767,7 +778,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -865,7 +876,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -917,7 +928,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
if (ukey) {
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -958,6 +969,80 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
}
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_user_ptr(attr->key);
+ void __user *uvalue = u64_to_user_ptr(attr->value);
+ int ufd = attr->map_fd;
+ struct bpf_map *map;
+ void *key, *value, *ptr;
+ u32 value_size;
+ struct fd f;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ key = __bpf_copy_key(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
+ goto err_put;
+ }
+
+ value_size = map->value_size;
+
+ err = -ENOMEM;
+ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ if (!value)
+ goto free_key;
+
+ err = -EFAULT;
+ if (copy_from_user(value, uvalue, value_size) != 0)
+ goto free_value;
+
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+ * inside bpf map update or delete otherwise deadlocks are possible
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ rcu_read_lock();
+ ptr = map->ops->map_lookup_and_delete_elem(map, key);
+ if (ptr)
+ memcpy(value, ptr, value_size);
+ rcu_read_unlock();
+ err = ptr ? 0 : -ENOENT;
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+
+ if (err)
+ goto free_value;
+
+ if (copy_to_user(uvalue, value, value_size) != 0)
+ goto free_value;
+
+ err = 0;
+
+free_value:
+ kfree(value);
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
static const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2418,6 +2503,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr);
break;
+ case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+ err = map_lookup_and_delete_elem(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca90679a7fe5..5bd67feb2f07 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2035,6 +2035,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
verbose(env, "invalid map_ptr to access map->key\n");
return -EACCES;
}
+
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
@@ -2454,7 +2455,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
if (func_id != BPF_FUNC_tail_call &&
func_id != BPF_FUNC_map_lookup_elem &&
func_id != BPF_FUNC_map_update_elem &&
- func_id != BPF_FUNC_map_delete_elem)
+ func_id != BPF_FUNC_map_delete_elem &&
+ func_id != BPF_FUNC_map_push_elem &&
+ func_id != BPF_FUNC_map_pop_elem &&
+ func_id != BPF_FUNC_map_peek_elem)
return 0;
if (meta->map_ptr == NULL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..e7860914ffc7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4830,6 +4830,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
Powered by blists - more mailing lists