[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190121201021.30785-1-jakub.kicinski@netronome.com>
Date: Mon, 21 Jan 2019 12:10:21 -0800
From: Jakub Kicinski <jakub.kicinski@...ronome.com>
To: alexei.starovoitov@...il.com, daniel@...earbox.net
Cc: yhs@...com, sdf@...gle.com, brouer@...hat.com,
netdev@...r.kernel.org, oss-drivers@...ronome.com,
Jakub Kicinski <jakub.kicinski@...ronome.com>
Subject: [RFC bpf-next] bpf: allow users to opt out of releasing objects when urefs are gone
Commit c9da161c6517 ("bpf: fix clearing on persistent program array maps")
added protection against dependency loops between programs and maps leading
to zombie objects which would never be freed. FD maps are flushed when
last user reference is gone.
This is confusing to users of bpftool, as updates to tail call maps have
no effect, unless those maps are pinned (or open by another entity).
Today, flushing should not be a requirement for privileged user, as root
has access to GET_FD_BY_ID commends, and therefore can manually flush
the maps. Add a flag to opt out of automatic flushing.
Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@...ronome.com>
---
Hi!
I wonder what the reactions to this (untested) patch would be? I don't
really care strongly, but perhaps others experienced a similar issue?
---
include/uapi/linux/bpf.h | 3 +++
kernel/bpf/arraymap.c | 13 ++++++++++---
kernel/bpf/syscall.c | 9 ++++++++-
net/core/sock_map.c | 3 ++-
4 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91c43884f295..771f5cd957a7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -291,6 +291,9 @@ enum bpf_attach_type {
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
+/* Don't flush fd map when last reference is gone */
+#define BPF_F_FD_MAP_NO_UREF_FLUSH (1U << 7)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 25632a75d630..ca8a4504c612 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -23,6 +23,8 @@
#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define FD_ARRAY_CREATE_FLAG_MASK \
+ (ARRAY_CREATE_FLAG_MASK | BPF_F_FD_MAP_NO_UREF_FLUSH)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -54,7 +56,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
}
/* Called from syscall */
-int array_map_alloc_check(union bpf_attr *attr)
+static int __array_map_alloc_check(union bpf_attr *attr, u32 allowed_flags)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
@@ -62,7 +64,7 @@ int array_map_alloc_check(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
- attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
+ attr->map_flags & ~allowed_flags ||
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
@@ -75,6 +77,11 @@ int array_map_alloc_check(union bpf_attr *attr)
return 0;
}
+int array_map_alloc_check(union bpf_attr *attr)
+{
+ return __array_map_alloc_check(attr, ARRAY_CREATE_FLAG_MASK);
+}
+
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
@@ -431,7 +438,7 @@ static int fd_array_map_alloc_check(union bpf_attr *attr)
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return -EINVAL;
- return array_map_alloc_check(attr);
+ return __array_map_alloc_check(attr, FD_ARRAY_CREATE_FLAG_MASK);
}
static void fd_array_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..42f10766f62e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -123,6 +123,12 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
err = ops->map_alloc_check(attr);
if (err)
return ERR_PTR(err);
+ /* Unprivileged users can't GET_FD_BY_ID so they always
+ * need the flush protection.
+ */
+ if ((attr->map_flags & BPF_F_FD_MAP_NO_UREF_FLUSH) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
}
if (attr->map_ifindex)
ops = &bpf_map_offload_ops;
@@ -293,7 +299,8 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic_dec_and_test(&map->usercnt)) {
- if (map->ops->map_release_uref)
+ if (map->ops->map_release_uref &&
+ !(map->map_flags & BPF_F_FD_MAP_NO_UREF_FLUSH))
map->ops->map_release_uref(map);
}
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index be6092ac69f8..ea5a16d88bec 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -19,7 +19,8 @@ struct bpf_stab {
};
#define SOCK_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \
+ BPF_F_FD_MAP_NO_UREF_FLUSH)
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
--
2.19.2
Powered by blists - more mailing lists