lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 27 Jan 2022 16:37:49 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Peter Oskolkov <posk@...gle.com>
Cc:     mingo@...hat.com, tglx@...utronix.de, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, linux-api@...r.kernel.org, x86@...nel.org,
        pjt@...gle.com, avagin@...gle.com, jannh@...gle.com,
        tdelisle@...terloo.ca, posk@...k.io
Subject: Re: [RFC PATCH v2 4/5] sched: UMCG: add a blocked worker list

On Thu, Jan 13, 2022 at 03:39:39PM -0800, Peter Oskolkov wrote:

> This change introduces the following benefits:
> - block detection how behaves similarly to wake detection;
>   without this patch worker wakeups added wakees to the list
>   and woke the server, while worker blocks only woke the server
>   without adding blocked workers to a list, forcing servers
>   to explicitly check worker's state;

> - if the blocked worker woke sufficiently quickly, the server
>   woken on the block event would observe its worker now as
>   RUNNABLE, so the block event had to be inferred rather than
>   explicitly signalled by the worker being added to the blocked
>   worker list;

This I think is missing the point, there is no race if the server checks
curr->state == RUNNING.

> - it is now possible for a single server to control several
>   RUNNING workers, which makes writing userspace schedulers
>   simpler for smaller processes that do not need to scale beyond
>   one "server";

How about something like so on top?

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1298,6 +1298,7 @@ struct task_struct {
 
 #ifdef CONFIG_UMCG
 	/* setup by sys_umcg_ctrl() */
+	u32			umcg_flags;
 	clockid_t		umcg_clock;
 	struct umcg_task __user	*umcg_task;
 
--- a/include/uapi/linux/umcg.h
+++ b/include/uapi/linux/umcg.h
@@ -119,6 +119,8 @@ struct umcg_task {
 	 *
 	 * Readable/writable by both the kernel and the userspace: the
 	 * kernel adds items to the list, userspace removes them.
+	 *
+	 * Only used with UMCG_CTL_MULTI.
 	 */
 	__u64	blocked_workers_ptr;		/* r/w */
 
@@ -147,11 +149,13 @@ enum umcg_wait_flag {
  * @UMCG_CTL_REGISTER:   register the current task as a UMCG task
  * @UMCG_CTL_UNREGISTER: unregister the current task as a UMCG task
  * @UMCG_CTL_WORKER:     register the current task as a UMCG worker
+ * @UMCG_CTL_MULTI:	 allow 1:n worker relations, enables blocked_workers_ptr
  */
 enum umcg_ctl_flag {
 	UMCG_CTL_REGISTER	= 0x00001,
 	UMCG_CTL_UNREGISTER	= 0x00002,
 	UMCG_CTL_WORKER		= 0x10000,
+	UMCG_CTL_MULTI		= 0x20000,
 };
 
 #endif /* _UAPI_LINUX_UMCG_H */
--- a/kernel/sched/umcg.c
+++ b/kernel/sched/umcg.c
@@ -335,7 +335,7 @@ static inline int umcg_enqueue_runnable(
 }
 
 /*
- * Enqueue @tsk on it's server's blocked list
+ * Enqueue @tsk on it's server's blocked list OR ensure @tsk == server::next_tid
  *
  * Must be called in umcg_pin_pages() context, relies on tsk->umcg_server.
  *
@@ -346,10 +346,34 @@ static inline int umcg_enqueue_runnable(
  * Returns:
  *   0: success
  *   -EFAULT
+ *   -ESRCH	server::next_tid is not a valid UMCG task
+ *   -EINVAL	server::next_tid doesn't match @tsk
  */
 static inline int umcg_enqueue_blocked(struct task_struct *tsk)
 {
-	return umcg_enqueue(tsk, true /* blocked */);
+	struct task_struct *next;
+	u32 next_tid;
+	int ret;
+
+	if (tsk->umcg_server->umcg_flags & UMCG_CTL_MULTI)
+		return umcg_enqueue(tsk, true /* blocked */);
+
+	/*
+	 * When !MULTI, ensure this worker is the current worker,
+	 * ensuring the 1:1 relation.
+	 */
+	if (get_user(next_tid, &tsk->umcg_server_task->next_tid))
+		return -EFAULT;
+
+	next = umcg_get_task(next_tid);
+	if (!next)
+		return -ESRCH;
+
+	ret = (next == tsk) ? 0 : -EINVAL;
+
+	put_task_struct(next);
+
+	return ret;
 }
 
 /* pre-schedule() */
@@ -911,6 +934,8 @@ static int umcg_register(struct umcg_tas
 		return -EINVAL;
 	}
 
+	current->umcg_flags = flags;
+
 	if (current->umcg_task || !self)
 		return -EINVAL;
 
@@ -1061,7 +1086,7 @@ SYSCALL_DEFINE3(umcg_ctl, u32, flags, st
 
 	flags &= ~UMCG_CTL_CMD;
 
-	if (flags & ~(UMCG_CTL_WORKER))
+	if (flags & ~(UMCG_CTL_WORKER|UMCG_CTL_MULTI))
 		return -EINVAL;
 
 	switch (cmd) {

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ