linux-kernel - Re: [PATCH RFC 2/2] cpu/bugs: cgroup: Add a cgroup knob to bypass CPU mitigations

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5f48073d-8b4e-4569-af4f-3a9b5586d7ad@redhat.com>
Date: Fri, 20 Sep 2024 03:05:57 -0400
From: Waiman Long <longman@...hat.com>
To: Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
 Andy Lutomirski <luto@...nel.org>, Thomas Gleixner <tglx@...utronix.de>,
 Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
 Dave Hansen <dave.hansen@...ux.intel.com>,
 David Kaplan <David.Kaplan@....com>,
 Daniel Sneddon <daniel.sneddon@...ux.intel.com>, x86@...nel.org,
 "H. Peter Anvin" <hpa@...or.com>, Peter Zijlstra <peterz@...radead.org>,
 Josh Poimboeuf <jpoimboe@...nel.org>, Steven Rostedt <rostedt@...dmis.org>
Cc: linux-kernel@...r.kernel.org, cgroups@...r.kernel.org
Subject: Re: [PATCH RFC 2/2] cpu/bugs: cgroup: Add a cgroup knob to bypass CPU
 mitigations


On 9/19/24 17:52, Pawan Gupta wrote:
> For cases where an admin wanting to bypass CPU mitigations for a specific
> workload that they trust. Add a cgroup attribute "cpu.skip_mitigation" that
> can only be set by a privileged user. When set, the CPU mitigations are
> bypassed for all tasks in that cgroup.
>
> Before setting this knob, the admin should be aware of possible security
> risks like confused deputy attack on trusted interpreters, JIT engine,
> etc.
>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>
> ---
>   arch/x86/include/asm/switch_to.h | 10 ++++++++++
>   arch/x86/kernel/cpu/bugs.c       | 21 ++++++++++++++++++++
>   include/linux/cgroup-defs.h      |  3 +++
>   kernel/cgroup/cgroup.c           | 42 ++++++++++++++++++++++++++++++++++++++++
>   kernel/sched/core.c              |  2 +-
>   5 files changed, 77 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
> index c3bd0c0758c9..7f32fd139644 100644
> --- a/arch/x86/include/asm/switch_to.h
> +++ b/arch/x86/include/asm/switch_to.h
> @@ -46,6 +46,16 @@ struct fork_frame {
>   	struct pt_regs regs;
>   };
>   
> +extern inline void cpu_mitigation_skip(struct task_struct *prev, struct task_struct *next);
> +
> +#define prepare_arch_switch prepare_arch_switch
> +
> +static inline void prepare_arch_switch(struct task_struct *prev,
> +				       struct task_struct *next)
> +{
> +	cpu_mitigation_skip(prev, next);
> +}
> +
>   #define switch_to(prev, next, last)					\
>   do {									\
>   	((last) = __switch_to_asm((prev), (next)));			\
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 45675da354f3..77eb4f6dc5c9 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -128,6 +128,27 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
>   DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
>   EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
>   
> +inline void cpu_mitigation_skip(struct task_struct *prev,
> +				struct task_struct *next)
> +{
> +	bool prev_skip = false, next_skip = false;
> +
> +	if (prev->mm)
> +		prev_skip = task_dfl_cgroup(prev)->cpu_skip_mitigation;
> +	if (next->mm)
> +		next_skip = task_dfl_cgroup(next)->cpu_skip_mitigation;
> +
> +	if (!prev_skip && !next_skip)
> +		return;
> +	if (prev_skip == next_skip)
> +		return;
I believe the first (!prev_skip && !next_skip) check is redundant.
> +
> +	if (next_skip)
> +		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64_unmitigated);
> +	else
> +		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64_mitigated);
> +}
> +
>   void __init cpu_select_mitigations(void)
>   {
>   	/*
> diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> index ae04035b6cbe..6a131a62f43c 100644
> --- a/include/linux/cgroup-defs.h
> +++ b/include/linux/cgroup-defs.h
> @@ -546,6 +546,9 @@ struct cgroup {
>   	struct bpf_local_storage __rcu  *bpf_cgrp_storage;
>   #endif
>   
> +	/* Used to bypass the CPU mitigations for tasks in a cgroup */
> +	bool cpu_skip_mitigation;
> +
>   	/* All ancestors including self */
>   	struct cgroup *ancestors[];
>   };
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index c8e4b62b436a..b745dbcb153e 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -2045,6 +2045,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
>   	cgrp->dom_cgrp = cgrp;
>   	cgrp->max_descendants = INT_MAX;
>   	cgrp->max_depth = INT_MAX;
> +	cgrp->cpu_skip_mitigation = 0;
>   	INIT_LIST_HEAD(&cgrp->rstat_css_list);
>   	prev_cputime_init(&cgrp->prev_cputime);
>   
> @@ -3751,6 +3752,41 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
>   	return ret;
>   }
>   
> +static int cpu_skip_mitigation_show(struct seq_file *seq, void *v)
> +{
> +	struct cgroup *cgrp = seq_css(seq)->cgroup;
> +	int ret = 0;
> +
> +	seq_printf(seq, "%d\n", cgrp->cpu_skip_mitigation);
> +
> +	return ret;
> +}
> +
> +static ssize_t cgroup_skip_mitigation_write(struct kernfs_open_file *of,
> +					    char *buf, size_t nbytes,
> +					    loff_t off)
> +{
> +	struct cgroup *cgrp = of->kn->parent->priv;
> +	struct cgroup_file_ctx *ctx = of->priv;
> +	u64 skip_mitigation;
> +	int ret;
> +
> +	/* Only privileged user in init namespace is allowed to set skip_mitigation */
> +	if ((ctx->ns != &init_cgroup_ns) || !capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	ret = kstrtoull(buf, 0, &skip_mitigation);
> +	if (ret)
> +		return -EINVAL;
> +
> +	if (skip_mitigation > 1)
> +		return -EINVAL;
> +
> +	cgrp->cpu_skip_mitigation = skip_mitigation;
> +
> +	return nbytes;
> +}
> +
>   static int cpu_local_stat_show(struct seq_file *seq, void *v)
>   {
>   	struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
> @@ -5290,6 +5326,12 @@ static struct cftype cgroup_base_files[] = {
>   		.name = "cpu.stat.local",
>   		.seq_show = cpu_local_stat_show,
>   	},
> +	{
> +		.name = "cpu.skip_mitigation",
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +		.seq_show = cpu_skip_mitigation_show,
> +		.write = cgroup_skip_mitigation_write,
> +	},
>   	{ }	/* terminate */
>   };
Since this control knob is effective only for x86_64, should we enable 
this only for this architecture? However, cgroup never has architecture 
specific control knob like that before, it will be the first if we 
decide to do that.
>   
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index f3951e4a55e5..4b4109afbf7c 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4994,7 +4994,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
>   	fire_sched_out_preempt_notifiers(prev, next);
>   	kmap_local_sched_out();
>   	prepare_task(next);
> -	prepare_arch_switch(next);
> +	prepare_arch_switch(prev, next);
>   }
>   
>   /**
>
The sparc and m68k arches have their own prepare_arch_switch() calls. So 
you will need to modify those places as well.

Cheers,
Longman