[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <SJ0PR04MB7248C599DE6F006F94997CF180C39@SJ0PR04MB7248.namprd04.prod.outlook.com>
Date: Sat, 14 Jan 2023 15:53:22 +0800
From: Roland <kernel.pwn@...look.com>
To: mingo@...hat.com
Cc: peterz@...radead.org, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
bristot@...hat.com, vschneid@...hat.com, ast@...nel.org,
daniel@...earbox.net, andrii@...nel.org, martin.lau@...ux.dev,
song@...nel.org, yhs@...com, john.fastabend@...il.com,
kpsingh@...nel.org, sdf@...gle.com, haoluo@...gle.com,
jolsa@...nel.org, mhiramat@...nel.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org,
kernel.pwn@...look.com
Subject: [PATCH] bpf: Add CONFIG_BPF_HELPER_STRICT
In container environment, ebpf helpers could be used maliciously to
leak information, DOS, even escape from containers.
CONFIG_BPF_HELPER_STRICT is as a mitigation of it.
Related Link: https://rolandorange.zone/report.html
Signed-off-by: Roland <kernel.pwn@...look.com>
---
include/linux/kernel.h | 7 ++++++
include/linux/sched.h | 4 +++-
include/uapi/linux/bpf.h | 4 ++++
include/uapi/linux/sched.h | 4 ++++
kernel/bpf/Kconfig | 6 +++++
kernel/bpf/syscall.c | 48 +++++++++++++++++++++++++++++++++++---
kernel/fork.c | 13 +++++++++++
kernel/trace/bpf_trace.c | 29 +++++++++++++++++++++--
8 files changed, 109 insertions(+), 6 deletions(-)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fe6efb24d..61f7fcbc9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -509,3 +509,10 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
BUILD_BUG_ON_ZERO((perms) & 2) + \
(perms))
#endif
+
+#ifdef CONFIG_BPF_HELPER_STRICT
+# define BPF_PROBE_WRITE_BIT 1
+# define BPF_PROBE_READ_BIT 2
+# define BPF_SEND_SIGNAL_BIT 3
+# define BPF_OVERRIDE_RETURN_BIT 4
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ffb6eb55c..a3ec52056 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -760,7 +760,9 @@ struct task_struct {
/* Per task flags (PF_*), defined further below: */
unsigned int flags;
unsigned int ptrace;
-
+#ifdef CONFIG_BPF_HELPER_STRICT
+ unsigned int bpf_helper_bitfield;
+#endif
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 51b9aa640..99a90d0f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -900,6 +900,7 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_HELPER_BITS_SET,
};
enum bpf_map_type {
@@ -1326,6 +1327,9 @@ union bpf_attr {
* to using 5 hash functions).
*/
__u64 map_extra;
+#ifdef CONFIG_BPF_HELPER_STRICT
+ __u32 security_helper_bits;
+#endif
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ce..c2fd463be 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -43,6 +43,10 @@
*/
#define CLONE_NEWTIME 0x00000080 /* New time namespace */
+#ifdef CONFIG_BPF_HELPER_STRICT
+#define CLONE_BITFIELD 0x00000040 /* set if bpf_helper_bitfield shared between processes */
+#endif
+
#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 2dfe1079f..c2c2fcf76 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -99,4 +99,10 @@ config BPF_LSM
If you are unsure how to answer this question, answer N.
+config BPF_HELPER_STRICT
+ bool "Enable BPF HELPER Check bits"
+ depends on BPF_SYSCALL
+ help
+ Enable several check bits for bpf helpers' security improvements.
+
+ BPF_HELPER_STRICT restricts the function of helpers.
+ Currently it can be used for restrict override return,
+ read, write, and send signal.
endmenu # "BPF subsystem"
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7b373a5e8..2f05ea50f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,6 +68,45 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
#undef BPF_LINK_TYPE
};
+#ifdef CONFIG_BPF_HELPER_STRICT
+static __always_inline int HelperWrite(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_PROBE_WRITE_BIT) != 0;
+}
+static __always_inline int HelperRead(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_PROBE_READ_BIT) != 0;
+}
+static __always_inline int HelperSendSignal(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_SEND_SIGNAL_BIT) != 0;
+}
+static __always_inline int HelperOverrideReturn(unsigned int bits)
+{
+ return ((unsigned int)bits & BPF_OVERRIDE_RETURN_BIT) != 0;
+}
+int bpf_set_security_helper_bits(union bpf_attr *attr)
+{
+ int res;
+ unsigned int bits_to_set;
+ unsigned int expected_bpf_helper_bitfield = 0;
+
+ bits_to_set = attr->security_helper_bits;
+
+ if (HelperWrite(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_PROBE_WRITE_BIT;
+ if (HelperRead(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_PROBE_READ_BIT;
+ if (HelperSendSignal(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_SEND_SIGNAL_BIT;
+ if (HelperOverrideReturn(bits_to_set))
+ expected_bpf_helper_bitfield |= 1 << BPF_OVERRIDE_RETURN_BIT;
+
+ current->bpf_helper_bitfield = expected_bpf_helper_bitfield;
+ res = 0;
+ return res;
+}
+#endif
/*
* If we're handed a bigger struct than we know of, ensure all the unknown bits
* are 0 - i.e. new user-space does not rely on any kernel feature extensions
@@ -4913,7 +4952,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
union bpf_attr attr;
bool capable;
int err;
-
+
capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
/* Intent here is for unprivileged_bpf_disabled to block key object
@@ -4925,7 +4964,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
* and other operations.
*/
if (!capable &&
- (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
+ (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD || cmd == BPF_HELPER_BITS_SET))
return -EPERM;
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
@@ -4938,7 +4977,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;
- err = security_bpf(cmd, &attr, size);
+ err = security_bpf(cmd, &attr, size);
if (err < 0)
return err;
@@ -5056,6 +5095,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
+ case BPF_HELPER_BITS_SET:
+ err = bpf_set_security_helper_bits(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa..6168da0b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1998,6 +1998,10 @@ static __latent_entropy struct task_struct *copy_process(
const u64 clone_flags = args->flags;
struct nsproxy *nsp = current->nsproxy;
+#ifdef CONFIG_BPF_HELPER_STRICT
+ unsigned int bitfield = current->bpf_helper_bitfield;
+#endif
+
/*
* Don't allow sharing the root directory with processes in a different
* namespace
@@ -2102,6 +2106,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
+
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
@@ -2490,6 +2495,14 @@ static __latent_entropy struct task_struct *copy_process(
copy_oom_score_adj(clone_flags, p);
+#ifdef CONFIG_BPF_HELPER_STRICT
+ /* Only if explicit set CLONE_BITFIELD or
+ * the forked process has both CAP_BPF and CAP_SYS_ADMIN,
+ * we will set the bitfield
+ */
+ p->bpf_helper_bitfield = (clone_flags & CLONE_BITFIELD) ? bitfield : 0;
+ if (capable(CAP_BPF) && capable(CAP_SYS_ADMIN))
+ p->bpf_helper_bitfield = bitfield;
+#endif
return p;
bad_fork_cancel_cgroup:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1ed08967f..5c4232d45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -39,6 +39,16 @@
#define bpf_event_rcu_dereference(p) \
rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
+#ifdef CONFIG_BPF_HELPER_STRICT
+static bool check_bpf_bitfield(unsigned int flags)
+{
+ unsigned int bits = current->bpf_helper_bitfield;
+
+ if (!(bits & (1 << flags)))
+ return false;
+
+ return true;
+}
+#endif
+
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@@ -145,6 +155,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
{
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_OVERRIDE_RETURN_BIT)))
+ return -EPERM;
+#endif
regs_set_return_value(regs, rc);
override_function_with_return(regs);
return 0;
@@ -162,8 +176,8 @@ static const struct bpf_func_proto bpf_override_return_proto = {
static __always_inline int
bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
{
- int ret;
+ int ret;
ret = copy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
@@ -287,6 +301,10 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
const void *, unsafe_ptr)
{
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_PROBE_READ_BIT)))
+ return -EPERM;
+#endif
if ((unsigned long)unsafe_ptr < TASK_SIZE) {
return bpf_probe_read_user_common(dst, size,
(__force void __user *)unsafe_ptr);
@@ -338,7 +356,10 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
* state, when the task or mm are switched. This is specifically
* required to prevent the use of temporary mm.
*/
-
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_PROBE_WRITE_BIT)))
+ return -EPERM;
+#endif
if (unlikely(in_interrupt() ||
current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
@@ -843,6 +864,10 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
* permitted in order to send signal to the current
* task.
*/
+#ifdef CONFIG_BPF_HELPER_STRICT
+ if (unlikely(!check_bpf_bitfield(BPF_SEND_SIGNAL_BIT)))
+ return -EPERM;
+#endif
if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
--
2.25.1
Powered by blists - more mailing lists