linux-kernel - [PATCH bpf-next 1/5] bpf: introduce BPF_PROG_TYPE

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200801084721.1812607-2-songliubraving@fb.com>
Date:   Sat, 1 Aug 2020 01:47:17 -0700
From:   Song Liu <songliubraving@...com>
To:     <linux-kernel@...r.kernel.org>, <bpf@...r.kernel.org>,
        <netdev@...r.kernel.org>
CC:     <ast@...nel.org>, <daniel@...earbox.net>, <kernel-team@...com>,
        <john.fastabend@...il.com>, <kpsingh@...omium.org>,
        <brouer@...hat.com>, <dlxu@...com>,
        Song Liu <songliubraving@...com>
Subject: [PATCH bpf-next 1/5] bpf: introduce BPF_PROG_TYPE_USER

As of today, to trigger BPF program from user space, the common practise
is to create a uprobe on a special function and calls that function. For
example, bpftrace uses BEGIN_trigger and END_trigger for the BEGIN and END
programs.

However, uprobe is not ideal for this use case. First, uprobe uses trap,
which adds non-trivial overhead. Second, uprobe requires calculating
function offset at runtime, which is not very reliable. bpftrace has
seen issues with this:
  https://github.com/iovisor/bpftrace/pull/1438
  https://github.com/iovisor/bpftrace/issues/1440

This patch introduces a new BPF program type BPF_PROG_TYPE_USER, or "user
program". User program is triggered via sys_bpf(BPF_PROG_TEST_RUN), which
is significant faster than a trap.

To make user program more flexible, we enabled the following features:
  1. The user can specify on which cpu the program should run. If the
     target cpu is not current cpu, the program is triggered via IPI.
  2. User can pass optional argument to user program. Currently, the
     argument can only be 5x u64 numbers.

User program has access to helper functions in bpf_tracing_func_proto()
and bpf_get_stack|stackid().

Signed-off-by: Song Liu <songliubraving@...com>
---
 include/linux/bpf_types.h      |   2 +
 include/uapi/linux/bpf.h       |  19 ++++++
 kernel/bpf/syscall.c           |   3 +-
 kernel/trace/bpf_trace.c       | 121 +++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  19 ++++++
 5 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a52a5688418e5..3c52f3207aced 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -76,6 +76,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension,
 BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
 	       void *, void *)
 #endif /* CONFIG_BPF_LSM */
+BPF_PROG_TYPE(BPF_PROG_TYPE_USER, user,
+	       void *, void *)
 #endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index eb5e0c38eb2cf..f6b9d4e7eeb4e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -190,6 +190,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_USER,
 };
 
 enum bpf_attach_type {
@@ -556,6 +557,12 @@ union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		cpu_plus;	/* run this program on cpu
+						 * (cpu_plus - 1).
+						 * If cpu_plus == 0, run on
+						 * current cpu. Only valid
+						 * for BPF_PROG_TYPE_USER.
+						 */
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -4441,4 +4448,16 @@ struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+struct pt_regs;
+
+#define BPF_USER_PROG_MAX_ARGS 5
+struct bpf_user_prog_args {
+	__u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
+struct bpf_user_prog_ctx {
+	struct pt_regs *regs;
+	__u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cd3d599e9e90e..f5a28fd8a9bc2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2078,6 +2078,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 	case BPF_PROG_TYPE_LSM:
 	case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
 	case BPF_PROG_TYPE_EXT: /* extends any prog */
+	case BPF_PROG_TYPE_USER:
 		return true;
 	default:
 		return false;
@@ -2969,7 +2970,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	}
 }
 
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu_plus
 
 static int bpf_prog_test_run(const union bpf_attr *attr,
 			     union bpf_attr __user *uattr)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cb91ef902cc43..cbe789bc1b986 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -16,6 +16,7 @@
 #include <linux/error-injection.h>
 #include <linux/btf_ids.h>
 
+#include <asm/irq_regs.h>
 #include <asm/tlb.h>
 
 #include "trace_probe.h"
@@ -1740,6 +1741,126 @@ const struct bpf_verifier_ops perf_event_verifier_ops = {
 const struct bpf_prog_ops perf_event_prog_ops = {
 };
 
+struct bpf_user_prog_test_run_info {
+	struct bpf_prog *prog;
+	struct bpf_user_prog_ctx ctx;
+	u32 retval;
+};
+
+static void
+__bpf_prog_test_run_user(struct bpf_user_prog_test_run_info *info)
+{
+	rcu_read_lock();
+	migrate_disable();
+	info->retval = BPF_PROG_RUN(info->prog, &info->ctx);
+	migrate_enable();
+	rcu_read_unlock();
+}
+
+static void _bpf_prog_test_run_user(void *data)
+{
+	struct bpf_user_prog_test_run_info *info = data;
+
+	info->ctx.regs = get_irq_regs();
+	__bpf_prog_test_run_user(info);
+}
+
+static int bpf_prog_test_run_user(struct bpf_prog *prog,
+				  const union bpf_attr *kattr,
+				  union bpf_attr __user *uattr)
+{
+	void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+	__u32 data_size = kattr->test.data_size_in;
+	struct bpf_user_prog_test_run_info info;
+	int cpu = kattr->test.cpu_plus - 1;
+	int err;
+
+	if (kattr->test.ctx_in || kattr->test.ctx_out ||
+	    kattr->test.duration || kattr->test.repeat ||
+	    kattr->test.data_out)
+		return -EINVAL;
+
+	if ((data_in && !data_size) || (!data_in && data_size))
+		return -EINVAL;
+
+	/* if provided, data_in should be struct bpf_user_prog_args */
+	if (data_size > 0 && data_size != sizeof(struct bpf_user_prog_args))
+		return -EINVAL;
+
+	if (kattr->test.data_size_in) {
+		if (copy_from_user(&info.ctx.args, data_in,
+				   sizeof(struct bpf_user_prog_args)))
+			return -EFAULT;
+	} else {
+		memset(&info.ctx.args, 0, sizeof(struct bpf_user_prog_args));
+	}
+
+	info.prog = prog;
+
+	if (!kattr->test.cpu_plus || cpu == smp_processor_id()) {
+		/* non-IPI, use regs from perf_fetch_caller_regs */
+		info.ctx.regs = get_bpf_raw_tp_regs();
+		if (IS_ERR(info.ctx.regs))
+			return PTR_ERR(info.ctx.regs);
+		perf_fetch_caller_regs(info.ctx.regs);
+		__bpf_prog_test_run_user(&info);
+		put_bpf_raw_tp_regs();
+	} else {
+		err = smp_call_function_single(cpu, _bpf_prog_test_run_user,
+					       &info, 1);
+		if (err)
+			return err;
+	}
+
+	if (copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static bool user_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				      const struct bpf_prog *prog,
+				      struct bpf_insn_access_aux *info)
+{
+	const int size_u64 = sizeof(u64);
+
+	if (off < 0 || off >= sizeof(struct bpf_user_prog_ctx))
+		return false;
+
+	switch (off) {
+	case bpf_ctx_range(struct bpf_user_prog_ctx, regs):
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+			return false;
+		break;
+	default:
+		break;
+	}
+	return true;
+}
+
+static const struct bpf_func_proto *
+user_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_get_stackid:
+		return &bpf_get_stackid_proto;
+	case BPF_FUNC_get_stack:
+		return &bpf_get_stack_proto;
+	default:
+		return bpf_tracing_func_proto(func_id, prog);
+	}
+}
+
+const struct bpf_verifier_ops user_verifier_ops = {
+	.get_func_proto		= user_prog_func_proto,
+	.is_valid_access	= user_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops user_prog_ops = {
+	.test_run	= bpf_prog_test_run_user,
+};
+
 static DEFINE_MUTEX(bpf_event_mutex);
 
 #define BPF_TRACE_MAX_PROGS 64
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index eb5e0c38eb2cf..f6b9d4e7eeb4e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -190,6 +190,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_USER,
 };
 
 enum bpf_attach_type {
@@ -556,6 +557,12 @@ union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		cpu_plus;	/* run this program on cpu
+						 * (cpu_plus - 1).
+						 * If cpu_plus == 0, run on
+						 * current cpu. Only valid
+						 * for BPF_PROG_TYPE_USER.
+						 */
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -4441,4 +4448,16 @@ struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+struct pt_regs;
+
+#define BPF_USER_PROG_MAX_ARGS 5
+struct bpf_user_prog_args {
+	__u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
+struct bpf_user_prog_ctx {
+	struct pt_regs *regs;
+	__u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
2.24.1