linux-kernel - [WIP 1/4] bpf: Respect persistent map and prog access modes

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <3a5ddc41c25e00e8590f6da414bcf0ed7626ffe1.1565040372.git.luto@kernel.org>
Date:   Mon,  5 Aug 2019 14:29:02 -0700
From:   Andy Lutomirski <luto@...nel.org>
To:     LKML <linux-kernel@...r.kernel.org>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>
Cc:     Song Liu <songliubraving@...com>,
        Kees Cook <keescook@...omium.org>,
        Networking <netdev@...r.kernel.org>, bpf <bpf@...r.kernel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Alexei Starovoitov <ast@...nel.org>,
        Kernel Team <Kernel-team@...com>,
        Lorenz Bauer <lmb@...udflare.com>,
        Jann Horn <jannh@...gle.com>,
        Greg KH <gregkh@...uxfoundation.org>,
        Linux API <linux-api@...r.kernel.org>,
        LSM List <linux-security-module@...r.kernel.org>,
        Andy Lutomirski <luto@...nel.org>
Subject: [WIP 1/4] bpf: Respect persistent map and prog access modes

In the interest of making bpf() more useful by unprivileged users,
this patch teaches bpf to respect access modes on map and prog
inodes.  The permissions are:

R on a map: read the map
W on a map: write the map

Referencing a map from a program should require RW.

R on a prog: Read or introspect the prog
W on a prog: Attach the prog to something

Test-running a prog is a form of introspection, so it requires RW.
Detaching a prog merely uses the fd for identification, so neither R
nor W is needed.

This is likely incomplete, and it has some comments that should be
removed.

This patch uses WRITE instead of EXEC as the permission needed to
run (by attaching or test-running) a program.  EXEC seems nicer, but
O_MAYEXEC isn't merged, which makes using X awkward.
---
 include/linux/bpf.h    | 15 +++++++------
 kernel/bpf/arraymap.c  |  8 ++++++-
 kernel/bpf/cgroup.c    |  6 ++++-
 kernel/bpf/inode.c     | 25 ++++++++++++++-------
 kernel/bpf/syscall.c   | 51 ++++++++++++++++++++++++++++++------------
 kernel/events/core.c   |  5 +++--
 net/core/dev.c         |  4 +++-
 net/core/filter.c      |  8 ++++---
 net/netfilter/xt_bpf.c |  5 +++--
 net/packet/af_packet.c |  2 +-
 10 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 18f4cc2c6acd..2d5e1a4dff6c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -630,9 +630,9 @@ extern const struct bpf_prog_ops bpf_offload_prog_ops;
 extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
 extern const struct bpf_verifier_ops xdp_analyzer_ops;
 
-struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_get(u32 ufd, int mask);
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
-				       bool attach_drv);
+				       bool attach_drv, int mask);
 struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
 void bpf_prog_sub(struct bpf_prog *prog, int i);
 struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
@@ -662,7 +662,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 extern int sysctl_unprivileged_bpf_disabled;
 
 int bpf_map_new_fd(struct bpf_map *map, int flags);
-int bpf_prog_new_fd(struct bpf_prog *prog);
+int bpf_prog_new_fd(struct bpf_prog *prog, int flags);
 
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
@@ -733,7 +733,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 		attr->numa_node : NUMA_NO_NODE;
 }
 
-struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type, int mask);
 int array_map_alloc_check(union bpf_attr *attr);
 
 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -850,7 +850,7 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
 }
 
 static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
-				enum bpf_prog_type type)
+				enum bpf_prog_type type, int mask)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -878,9 +878,10 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 #endif /* CONFIG_BPF_SYSCALL */
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
-						 enum bpf_prog_type type)
+						 enum bpf_prog_type type,
+						 int mask)
 {
-	return bpf_prog_get_type_dev(ufd, type, false);
+	return bpf_prog_get_type_dev(ufd, type, false, mask);
 }
 
 bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..7e17a5d42110 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -522,6 +522,10 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
 }
 
 /* only called from syscall */
+/*
+ * XXX: it's totally unclear to me what this ends up doing with the fd
+ * in general.
+ */
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags)
 {
@@ -569,7 +573,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
 				   struct file *map_file, int fd)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
-	struct bpf_prog *prog = bpf_prog_get(fd);
+
+	/* XXX: what, exactly, does this end up doing to the prog in question? */
+	struct bpf_prog *prog = bpf_prog_get(fd, FMODE_READ | FMODE_WRITE);
 
 	if (IS_ERR(prog))
 		return prog;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 0a00eaca6fae..1450c3bdab82 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -562,7 +562,11 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
 	if (IS_ERR(cgrp))
 		return PTR_ERR(cgrp);
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	/*
+	 * No particular access required -- this only uses the fd to identify
+	 * a program, not to do anything with the program.
+	 */
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype, 0);
 	if (IS_ERR(prog))
 		prog = NULL;
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index cc0d0cf114e3..cb07736b33ae 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -58,7 +58,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
 	}
 }
 
-static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
+static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type, int mask)
 {
 	void *raw;
 
@@ -66,7 +66,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
 	raw = bpf_map_get_with_uref(ufd);
 	if (IS_ERR(raw)) {
 		*type = BPF_TYPE_PROG;
-		raw = bpf_prog_get(ufd);
+		raw = bpf_prog_get(ufd, mask);
 	}
 
 	return raw;
@@ -430,7 +430,12 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 	if (IS_ERR(pname))
 		return PTR_ERR(pname);
 
-	raw = bpf_fd_probe_obj(ufd, &type);
+	/*
+	 * Pinning an object effectively grants the caller all access, because
+	 * the caller ends up owning the inode.  So require all access.
+	 * XXX: If we use FMODE_EXEC, we should require FMODE_EXEC too.
+	 */
+	raw = bpf_fd_probe_obj(ufd, &type, FMODE_READ | FMODE_WRITE);
 	if (IS_ERR(raw)) {
 		ret = PTR_ERR(raw);
 		goto out;
@@ -456,6 +461,10 @@ static void *bpf_obj_do_get(const struct filename *pathname,
 	if (ret)
 		return ERR_PTR(ret);
 
+	/*
+	 * XXX: O_MAYEXEC doesn't exist, which is problematic here if we
+	 * want to use FMODE_EXEC.
+	 */
 	inode = d_backing_inode(path.dentry);
 	ret = inode_permission(inode, ACC_MODE(flags));
 	if (ret)
@@ -499,7 +508,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	}
 
 	if (type == BPF_TYPE_PROG)
-		ret = bpf_prog_new_fd(raw);
+		ret = bpf_prog_new_fd(raw, f_flags);
 	else if (type == BPF_TYPE_MAP)
 		ret = bpf_map_new_fd(raw, f_flags);
 	else
@@ -512,10 +521,10 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	return ret;
 }
 
-static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type, int mask)
 {
 	struct bpf_prog *prog;
-	int ret = inode_permission(inode, MAY_READ);
+	int ret = inode_permission(inode, mask);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -536,14 +545,14 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
 	return bpf_prog_inc(prog);
 }
 
-struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type, int mask)
 {
 	struct bpf_prog *prog;
 	struct path path;
 	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
 	if (ret)
 		return ERR_PTR(ret);
-	prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+	prog = __get_prog_inode(d_backing_inode(path.dentry), type, mask);
 	if (!IS_ERR(prog))
 		touch_atime(&path);
 	path_put(&path);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5d141f16f6fa..23f8f89d2a86 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -447,6 +447,7 @@ int bpf_map_new_fd(struct bpf_map *map, int flags)
 
 int bpf_get_file_flag(int flags)
 {
+	/* XXX: What about exec? */
 	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
 		return -EINVAL;
 	if (flags & BPF_F_RDONLY)
@@ -556,6 +557,10 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		return -EINVAL;
 
+	/*
+	 * XXX: I'm a bit confused.  Why would you ever create a map and
+	 * grant *yourself* less than full permission?
+	 */
 	f_flags = bpf_get_file_flag(attr->map_flags);
 	if (f_flags < 0)
 		return f_flags;
@@ -1411,7 +1416,7 @@ const struct file_operations bpf_prog_fops = {
 	.write		= bpf_dummy_write,
 };
 
-int bpf_prog_new_fd(struct bpf_prog *prog)
+int bpf_prog_new_fd(struct bpf_prog *prog, int flags)
 {
 	int ret;
 
@@ -1420,10 +1425,10 @@ int bpf_prog_new_fd(struct bpf_prog *prog)
 		return ret;
 
 	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
-				O_RDWR | O_CLOEXEC);
+				flags | O_CLOEXEC);
 }
 
-static struct bpf_prog *____bpf_prog_get(struct fd f)
+static struct bpf_prog *____bpf_prog_get(struct fd f, int mask)
 {
 	if (!f.file)
 		return ERR_PTR(-EBADF);
@@ -1431,6 +1436,10 @@ static struct bpf_prog *____bpf_prog_get(struct fd f)
 		fdput(f);
 		return ERR_PTR(-EINVAL);
 	}
+	if ((f.file->f_mode & mask) != mask) {
+		fdput(f);
+		return ERR_PTR(-EACCES);
+	}
 
 	return f.file->private_data;
 }
@@ -1497,12 +1506,12 @@ bool bpf_prog_get_ok(struct bpf_prog *prog,
 }
 
 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
-				       bool attach_drv)
+				       bool attach_drv, int mask)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_prog *prog;
 
-	prog = ____bpf_prog_get(f);
+	prog = ____bpf_prog_get(f, mask);
 	if (IS_ERR(prog))
 		return prog;
 	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
@@ -1516,15 +1525,15 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
 	return prog;
 }
 
-struct bpf_prog *bpf_prog_get(u32 ufd)
+struct bpf_prog *bpf_prog_get(u32 ufd, int mask)
 {
-	return __bpf_prog_get(ufd, NULL, false);
+	return __bpf_prog_get(ufd, NULL, false, mask);
 }
 
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
-				       bool attach_drv)
+				       bool attach_drv, int mask)
 {
-	return __bpf_prog_get(ufd, &type, attach_drv);
+	return __bpf_prog_get(ufd, &type, attach_drv, mask);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
 
@@ -1707,7 +1716,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	if (err)
 		goto free_used_maps;
 
-	err = bpf_prog_new_fd(prog);
+	err = bpf_prog_new_fd(prog, O_RDWR /* | O_MAYEXEC */);
 	if (err < 0) {
 		/* failed to allocate fd.
 		 * bpf_prog_put() is needed because the above
@@ -1808,7 +1817,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	}
 	raw_tp->btp = btp;
 
-	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
+	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd, MAY_EXEC);
 	if (IS_ERR(prog)) {
 		err = PTR_ERR(prog);
 		goto out_free_tp;
@@ -1929,7 +1938,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype, MAY_EXEC);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -2083,7 +2092,11 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
 	    (!attr->test.ctx_size_out && attr->test.ctx_out))
 		return -EINVAL;
 
-	prog = bpf_prog_get(attr->test.prog_fd);
+	/*
+	 * A test run is is a form of query, so require RW.  Using W as a proxy for
+	 * X, since X is awkward due to a lack of O_MAYEXEC.
+	 */
+	prog = bpf_prog_get(attr->test.prog_fd, MAY_READ | MAY_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -2147,7 +2160,11 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	fd = bpf_prog_new_fd(prog);
+	/*
+	 * We have all permissions.  This is okay, since we also require
+	 * CAP_SYS_ADMIN to do this at all.
+	 */
+	fd = bpf_prog_new_fd(prog, O_RDWR /* | O_MAYEXEC */);
 	if (fd < 0)
 		bpf_prog_put(prog);
 
@@ -2638,6 +2655,11 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	if (!f.file)
 		return -EBADFD;
 
+	if (!(f.file->f_mode & FMODE_READ)) {
+		err = -EACCES;
+		goto out;
+	}
+
 	if (f.file->f_op == &bpf_prog_fops)
 		err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
 					      uattr);
@@ -2649,6 +2671,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	else
 		err = -EINVAL;
 
+out:
 	fdput(f);
 	return err;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..f2e3973b28f2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8876,7 +8876,8 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
 	if (event->prog)
 		return -EEXIST;
 
-	prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+	/* Should maybe be FMODE_EXEC? */
+	prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT, FMODE_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -8942,7 +8943,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		/* bpf programs can only be attached to u/kprobe or tracepoint */
 		return -EINVAL;
 
-	prog = bpf_prog_get(prog_fd);
+	prog = bpf_prog_get(prog_fd, FMODE_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..3fcaeae693bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8093,8 +8093,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 			return -EBUSY;
 		}
 
+		/* XXX: FMODE_EXEC? */
 		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
-					     bpf_op == ops->ndo_bpf);
+					     bpf_op == ops->ndo_bpf,
+					     FMODE_WRITE);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 4e2a79b2fd77..9282462678fd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1544,7 +1544,8 @@ static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return ERR_PTR(-EPERM);
 
-	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+	/* FMODE_EXEC? */
+	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE);
 }
 
 int sk_attach_bpf(u32 ufd, struct sock *sk)
@@ -1572,9 +1573,10 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return -EPERM;
 
-	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE);
 	if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
-		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
+		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT,
+					 FMODE_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 13cf3f9b5938..34e5c08ee1f3 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -44,7 +44,7 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 {
 	struct bpf_prog *prog;
 
-	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER, MAY_EXEC);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -57,7 +57,8 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
 	if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
 		return -EINVAL;
 
-	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER,
+				      MAY_EXEC);
 	return PTR_ERR_OR_ZERO(*ret);
 }
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8d54f3047768..5b8c5e5d94bf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1563,7 +1563,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fd, data, len))
 		return -EFAULT;
 
-	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
-- 
2.21.0