lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 20 May 2020 18:26:42 -0400
From:   "Joel Fernandes (Google)" <joel@...lfernandes.org>
To:     Nishanth Aravamudan <naravamudan@...italocean.com>,
        Julien Desfossez <jdesfossez@...italocean.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Tim Chen <tim.c.chen@...ux.intel.com>, mingo@...nel.org,
        tglx@...utronix.de, pjt@...gle.com, torvalds@...ux-foundation.org
Cc:     "Joel Fernandes (Google)" <joel@...lfernandes.org>,
        vpillai <vpillai@...italocean.com>, linux-kernel@...r.kernel.org,
        fweisbec@...il.com, keescook@...omium.org, kerrnel@...gle.com,
        Phil Auld <pauld@...hat.com>, Aaron Lu <aaron.lwe@...il.com>,
        Aubrey Li <aubrey.intel@...il.com>, aubrey.li@...ux.intel.com,
        Valentin Schneider <valentin.schneider@....com>,
        Mel Gorman <mgorman@...hsingularity.net>,
        Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Joel Fernandes <joelaf@...gle.com>
Subject: [PATCH RFC] sched: Add a per-thread core scheduling interface

Add a per-thread core scheduling interface which allows a thread to tag
itself and enable core scheduling. Based on discussion at OSPM with
maintainers, we propose a prctl(2) interface accepting values of 0 or 1.
 1 - enable core scheduling for the task.
 0 - disable core scheduling for the task.

Special cases:
(1)
The core-scheduling patchset contains a CGroup interface as well. In
order for us to respect users of that interface, we avoid overriding the
tag if a task was CGroup-tagged because the task becomes inconsistent
with the CGroup tag. Instead return -EBUSY.

(2)
If a task is prctl-tagged, allow the CGroup interface to override
the task's tag.

ChromeOS will use core-scheduling to securely enable hyperthreading.
This cuts down the keypress latency in Google docs from 150ms to 50ms
while improving the camera streaming frame rate by ~3%.

Signed-off-by: Joel Fernandes (Google) <joel@...lfernandes.org>
---
 include/linux/sched.h      |  6 ++++
 include/uapi/linux/prctl.h |  3 ++
 kernel/sched/core.c        | 57 ++++++++++++++++++++++++++++++++++++++
 kernel/sys.c               |  3 ++
 4 files changed, 69 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6ae59fcadbe..8a40a093aa2ca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1986,6 +1986,12 @@ static inline void rseq_execve(struct task_struct *t)
 
 #endif
 
+#ifdef CONFIG_SCHED_CORE
+int task_set_core_sched(int set, struct task_struct *tsk);
+#else
+int task_set_core_sched(int set, struct task_struct *tsk) { return -ENOTSUPP; }
+#endif
+
 void __exit_umh(struct task_struct *tsk);
 
 static inline void exit_umh(struct task_struct *tsk)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e362..dba0c70f9cce6 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,4 +238,7 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER		57
 #define PR_GET_IO_FLUSHER		58
 
+/* Core scheduling per-task interface */
+#define PR_SET_CORE_SCHED		59
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 684359ff357e7..780514d03da47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3320,6 +3320,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
 #ifdef CONFIG_SCHED_CORE
 	RB_CLEAR_NODE(&p->core_node);
+
+	/*
+	 * If task is using prctl(2) for tagging, do the prctl(2)-style tagging
+	 * for the child as well.
+	 */
+	if (current->core_cookie && ((unsigned long)current == current->core_cookie))
+		task_set_core_sched(1, p);
 #endif
 	return 0;
 }
@@ -7857,6 +7864,56 @@ void __cant_sleep(const char *file, int line, int preempt_offset)
 EXPORT_SYMBOL_GPL(__cant_sleep);
 #endif
 
+#ifdef CONFIG_SCHED_CORE
+
+/* Ensure that all siblings have rescheduled once */
+static int task_set_core_sched_stopper(void *data)
+{
+	return 0;
+}
+
+int task_set_core_sched(int set, struct task_struct *tsk)
+{
+	if (!tsk)
+		tsk = current;
+
+	if (set > 1)
+		return -ERANGE;
+
+	if (!static_branch_likely(&sched_smt_present))
+		return -EINVAL;
+
+	/*
+	 * If cookie was set previously, return -EBUSY if either of the
+	 * following are true:
+	 * 1. Task was previously tagged by CGroup method.
+	 * 2. Task or its parent were tagged by prctl().
+	 *
+	 * Note that, if CGroup tagging is done after prctl(), then that would
+	 * override the cookie. However, if prctl() is done after task was
+	 * added to tagged CGroup, then the prctl() returns -EBUSY.
+	 */
+	if (!!tsk->core_cookie == set) {
+		if ((tsk->core_cookie == (unsigned long)tsk) ||
+		    (tsk->core_cookie == (unsigned long)tsk->sched_task_group)) {
+			return -EBUSY;
+		}
+	}
+
+	if (set)
+		sched_core_get();
+
+	tsk->core_cookie = set ? (unsigned long)tsk : 0;
+
+	stop_machine(task_set_core_sched_stopper, NULL, NULL);
+
+	if (!set)
+		sched_core_put();
+
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_MAGIC_SYSRQ
 void normalize_rt_tasks(void)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a9..5c3bcf40dcb34 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2514,6 +2514,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 
 		error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
 		break;
+	case PR_SET_CORE_SCHED:
+		error = task_set_core_sched(arg2, NULL);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.26.2.761.g0e0b3e54be-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ