lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <9cd9abad06ad8c3f35228afd07c74c7d9533c412.1598643276.git.jdesfossez@digitalocean.com>
Date:   Fri, 28 Aug 2020 15:51:21 -0400
From:   Julien Desfossez <jdesfossez@...italocean.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Vineeth Pillai <viremana@...ux.microsoft.com>,
        Joel Fernandes <joelaf@...gle.com>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        Aaron Lu <aaron.lwe@...il.com>,
        Aubrey Li <aubrey.intel@...il.com>,
        Dhaval Giani <dhaval.giani@...cle.com>,
        Chris Hyser <chris.hyser@...cle.com>,
        Nishanth Aravamudan <naravamudan@...italocean.com>
Cc:     mingo@...nel.org, tglx@...utronix.de, pjt@...gle.com,
        torvalds@...ux-foundation.org, linux-kernel@...r.kernel.org,
        fweisbec@...il.com, keescook@...omium.org, kerrnel@...gle.com,
        Phil Auld <pauld@...hat.com>,
        Valentin Schneider <valentin.schneider@....com>,
        Mel Gorman <mgorman@...hsingularity.net>,
        Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
        Paolo Bonzini <pbonzini@...hat.com>, joel@...lfernandes.org,
        vineeth@...byteword.org, Chen Yu <yu.c.chen@...el.com>,
        Christian Brauner <christian.brauner@...ntu.com>,
        Agata Gruza <agata.gruza@...el.com>,
        Antonio Gomez Iglesias <antonio.gomez.iglesias@...el.com>,
        graf@...zon.com, konrad.wilk@...cle.com, dfaggioli@...e.com,
        rostedt@...dmis.org, derkling@...gle.com, benbjiang@...cent.com
Subject: [RFC PATCH v7 20/23] sched/coresched: config option for kernel protection

From: Vineeth Pillai <viremana@...ux.microsoft.com>

There are use cases where the kernel protection is not needed. One
example could be about using core scheduling for non-security related
use cases - isolate core for a particular process dynamically. Also,
to test/benchmark the overhead of kernel protection.

Have a compile time and boot time option to disable the feature.
CONFIG_SCHED_CORE_KERNEL_PROTECTION will enable this feature at
compile time and is enabled by default is CONFIG_SCHED_CORE=y.
sched_core_kernel_protection= boot time option to control this. Value
0 will disable the feature.

Signed-off-by: Vineeth Pillai <viremana@...ux.microsoft.com>
---
 .../admin-guide/kernel-parameters.txt         |  9 +++++
 include/linux/sched.h                         |  2 +-
 kernel/Kconfig.preempt                        | 13 +++++++
 kernel/sched/core.c                           | 39 ++++++++++++++++++-
 kernel/sched/sched.h                          |  2 +
 5 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..01e442388e4a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4611,6 +4611,15 @@
 
 	sbni=		[NET] Granch SBNI12 leased line adapter
 
+	sched_core_kernel_protection=
+			[SCHED_CORE, SCHED_CORE_IRQ_PAUSE] Pause SMT siblings
+			of a core runninig in user mode if atleast one of the
+			siblings of the core is running in kernel. This is to
+			guarantee that kernel data is not leaked to tasks which
+			are not trusted by the kernel.
+			This feature is valid only when Core scheduling is
+			enabled(CONFIG_SCHED_CORE).
+
 	sched_debug	[KNL] Enables verbose scheduler debug messages.
 
 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e04ffe689cb..4d9ae6b4dcc9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2055,7 +2055,7 @@ int sched_trace_rq_nr_running(struct rq *rq);
 
 const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 
-#ifdef CONFIG_SCHED_CORE
+#ifdef CONFIG_SCHED_CORE_KERNEL_PROTECTION
 void sched_core_unsafe_enter(void);
 void sched_core_unsafe_exit(void);
 void sched_core_unsafe_exit_wait(unsigned long ti_check);
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 4488fbf4d3a8..52f86739f910 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -86,3 +86,16 @@ config SCHED_CORE
 	default y
 	depends on SCHED_SMT
 
+config SCHED_CORE_KERNEL_PROTECTION
+	bool "Core Scheduling for SMT"
+	default y
+	depends on SCHED_CORE
+	help
+	  This option enables pausing all SMT siblings of a core running in
+	  user mode when atleast one of the siblings in the core is in kernel.
+	  This is to enforce security such that information from kernel is not
+	  leaked to non-trusted tasks running on siblings. This option is valid
+	  only if Core Scheduling(CONFIG_SCHED_CORE) is enabled.
+
+	  If in doubt, select 'Y' when CONFIG_SCHED_CORE=y
+
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0dc9172be04d..34238fd67f31 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -75,6 +75,24 @@ __read_mostly int scheduler_running;
 
 #ifdef CONFIG_SCHED_CORE
 
+#ifdef CONFIG_SCHED_CORE_KERNEL_PROTECTION
+
+DEFINE_STATIC_KEY_TRUE(sched_core_kernel_protection);
+static int __init set_sched_core_kernel_protection(char *str)
+{
+	unsigned long val = 0;
+
+	if (!str)
+		return 0;
+
+	if (!kstrtoul(str, 0, &val) && !val)
+		static_branch_disable(&sched_core_kernel_protection);
+
+	return 1;
+}
+__setup("sched_core_kernel_protection=", set_sched_core_kernel_protection);
+#endif
+
 DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
 
 /* kernel prio, less is more */
@@ -4600,6 +4618,8 @@ static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
 	return a->core_cookie == b->core_cookie;
 }
 
+#ifdef CONFIG_SCHED_CORE_KERNEL_PROTECTION
+
 /*
  * Handler to attempt to enter kernel. It does nothing because the exit to
  * usermode or guest mode will do the actual work (of waiting if needed).
@@ -4609,6 +4629,11 @@ static void sched_core_irq_work(struct irq_work *work)
 	return;
 }
 
+static inline void init_sched_core_irq_work(struct rq *rq)
+{
+	init_irq_work(&rq->core_irq_work, sched_core_irq_work);
+}
+
 /*
  * sched_core_wait_till_safe - Pause the caller's hyperthread until the core
  * exits the core-wide unsafe state. Obviously the CPU calling this function
@@ -4684,6 +4709,9 @@ void sched_core_unsafe_enter(void)
 	struct rq *rq;
 	int i, cpu;
 
+	if (!static_branch_likely(&sched_core_kernel_protection))
+		return;
+
 	/* Ensure that on return to user/guest, we check whether to wait. */
 	if (current->core_cookie)
 		set_tsk_thread_flag(current, TIF_UNSAFE_RET);
@@ -4769,6 +4797,9 @@ void sched_core_unsafe_exit(void)
 	struct rq *rq;
 	int cpu;
 
+	if (!static_branch_likely(&sched_core_kernel_protection))
+		return;
+
 	local_irq_save(flags);
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
@@ -4807,9 +4838,15 @@ void sched_core_unsafe_exit(void)
 
 void sched_core_unsafe_exit_wait(unsigned long ti_check)
 {
+	if (!static_branch_likely(&sched_core_kernel_protection))
+		return;
+
 	sched_core_unsafe_exit();
 	sched_core_wait_till_safe(ti_check);
 }
+#else
+static inline void init_sched_core_irq_work(struct rq *rq) {}
+#endif /* CONFIG_SCHED_CORE_KERNEL_PROTECTION */
 
 // XXX fairness/fwd progress conditions
 /*
@@ -7795,7 +7832,7 @@ int sched_cpu_starting(unsigned int cpu)
 			rq = cpu_rq(i);
 			if (rq->core && rq->core == rq)
 				core_rq = rq;
-			init_irq_work(&rq->core_irq_work, sched_core_irq_work);
+			init_sched_core_irq_work(rq);
 		}
 
 		if (!core_rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dbd8416ddaba..676818bdb9df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1058,8 +1058,10 @@ struct rq {
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;
 	unsigned char		core_forceidle;
+#ifdef CONFIG_SCHED_CORE_KERNEL_PROTECTION
 	struct irq_work		core_irq_work; /* To force HT into kernel */
 	unsigned int		core_this_unsafe_nest;
+#endif
 
 	/* shared state */
 	unsigned int		core_task_seq;
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ