lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 25 Jan 2022 15:59:31 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     mingo@...hat.com, tglx@...utronix.de, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com
Cc:     linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-api@...r.kernel.org, x86@...nel.org, pjt@...gle.com,
        posk@...gle.com, avagin@...gle.com, jannh@...gle.com,
        tdelisle@...terloo.ca, mark.rutland@....com, posk@...k.io
Subject: Re: [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups

On Mon, Jan 24, 2022 at 03:29:56PM +0100, Peter Zijlstra wrote:

> Oh how I hate signals... this can get scribbled by a syscall/fault from
> sigcontext :/

OK, the below seems to work. I'll see if I can clean it up some.

--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -94,28 +94,44 @@ static inline int syscall_get_arch(struc
 
 #else	 /* CONFIG_X86_64 */
 
-static inline void syscall_get_arguments(struct task_struct *task,
-					 struct pt_regs *regs,
-					 unsigned long *args)
+static inline unsigned long
+syscall_get_argument(struct task_struct *task, struct pt_regs *regs, int nr)
 {
-# ifdef CONFIG_IA32_EMULATION
+#ifdef CONFIG_IA32_EMULATION
 	if (task->thread_info.status & TS_COMPAT) {
-		*args++ = regs->bx;
-		*args++ = regs->cx;
-		*args++ = regs->dx;
-		*args++ = regs->si;
-		*args++ = regs->di;
-		*args   = regs->bp;
+		switch (nr) {
+		case 0: return regs->bx;
+		case 1: return regs->cx;
+		case 2: return regs->dx;
+		case 3: return regs->si;
+		case 4: return regs->di;
+		case 5: return regs->bp;
+		}
 	} else
-# endif
+#endif
 	{
-		*args++ = regs->di;
-		*args++ = regs->si;
-		*args++ = regs->dx;
-		*args++ = regs->r10;
-		*args++ = regs->r8;
-		*args   = regs->r9;
+		switch (nr) {
+		case 0: return regs->di;
+		case 1: return regs->si;
+		case 2: return regs->dx;
+		case 3: return regs->r10;
+		case 4: return regs->r8;
+		case 5: return regs->r9;
+		}
 	}
+
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	int i;
+
+	for (i = 0; i < 6; i++)
+		*args++ = syscall_get_argument(task, regs, i);
 }
 
 static inline int syscall_get_arch(struct task_struct *task)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1307,6 +1307,9 @@ struct task_struct {
 	struct task_struct	*umcg_server;
 	struct umcg_task __user *umcg_server_task;
 	struct page		*umcg_server_page;
+
+	unsigned long		umcg_stack_pointer;
+	unsigned int		umcg_worker;
 #endif
 
 	struct tlbflush_unmap_batch	tlb_ubc;
--- a/kernel/sched/umcg.c
+++ b/kernel/sched/umcg.c
@@ -459,7 +459,7 @@ static int umcg_wait(u64 timo)
 /*
  * Blocked case for umcg_sys_exit(), shared with sys_umcg_ctl().
  */
-static void umcg_unblock_and_wait(void)
+static void umcg_unblock(void)
 {
 	struct task_struct *tsk = current;
 	struct umcg_task __user *self = READ_ONCE(tsk->umcg_task);
@@ -478,15 +478,7 @@ static void umcg_unblock_and_wait(void)
 
 	umcg_unpin_pages();
 
-	switch (umcg_wait(0)) {
-	case 0:
-	case -EINTR:
-		/* notify_resume will continue the wait after the signal */
-		break;
-
-	default:
-		UMCG_DIE("wait");
-	}
+	/* notify-resume will wait */
 
 	tsk->flags |= PF_UMCG_WORKER;
 }
@@ -509,7 +501,7 @@ void umcg_sys_exit(struct pt_regs *regs)
 		return;
 	}
 
-	umcg_unblock_and_wait();
+	umcg_unblock();
 }
 
 /* return-to-user path */
@@ -518,11 +510,47 @@ void umcg_notify_resume(struct pt_regs *
 	struct task_struct *tsk = current;
 	struct umcg_task __user *self = tsk->umcg_task;
 	bool worker = tsk->flags & PF_UMCG_WORKER;
+	u64 timeout = 0;
 	u32 state;
+	int ret;
+
+	/*
+	 * Unix signals are horrible, but we have to handle them somehow.
+	 *
+	 * - simply discarding a signal breaks userspace so is not an option.
+	 *
+	 * - returning -EINTR and have userspace deal with it is not an option
+	 *   since we can be blocked here due to !syscall reasons (page-faults
+	 *   for example). But it's also not permissible to have random
+	 *   syscalls return -EINTR that didn't before.
+	 *
+	 * - subjecting signal handlers to UMCG would render existing signal
+	 *   handler code subject to the whims and latencies of UMCG; given that
+	 *   most signal hander code is short and time sensitive, this seems
+	 *   undesirable (consider ^C not working because it got delivered to a
+	 *   blocked task).
+	 *
+	 * Therefore the chosen path is to exclude signal context from UMCG
+	 * entirely and treat it as unmanaged time.
+	 */
+	if (tsk->umcg_stack_pointer) {
+		if (tsk->umcg_stack_pointer != user_stack_pointer(regs))
+			return;
+
+		tsk->umcg_stack_pointer = 0;
+		worker = tsk->umcg_worker;
+		tsk->umcg_worker = 0;
+
+		if (worker) {
+			set_syscall_work(SYSCALL_UMCG);
+			/* and PF_UMCG_SYSCALL at done */
+		}
+		goto resume;
+	}
 
 	/* avoid recursion vs schedule() */
 	if (worker)
-		current->flags &= ~PF_UMCG_WORKER;
+		tsk->flags &= ~PF_UMCG_WORKER;
 
 	if (get_user(state, &self->state))
 		UMCG_DIE("get-state");
@@ -554,10 +582,31 @@ void umcg_notify_resume(struct pt_regs *
 		umcg_unpin_pages();
 	}
 
-	switch (umcg_wait(0)) {
+resume:
+	/*
+	 * Hack alert! Since the return-to-user path must resume waiting it
+	 * needs access to the timeout argument and set the return value.
+	 */
+	if (syscall_get_nr(tsk, regs) == __NR_umcg_wait)
+		timeout = syscall_get_argument(tsk, regs, 1);
+
+	ret = umcg_wait(timeout);
+	switch (ret) {
 	case 0:
+		break;
+
 	case -EINTR:
 		/* we will resume the wait after the signal */
+		WARN_ON_ONCE(tsk->umcg_stack_pointer);
+		tsk->umcg_stack_pointer = user_stack_pointer(regs);
+		tsk->umcg_worker = worker;
+		clear_task_syscall_work(tsk, SYSCALL_UMCG);
+		/* implicitly clears PF_UMCG_WORKER with the early exit */
+		return;
+
+	case -ETIMEDOUT:
+		/* must be __NR_umcg_wait */
+		regs_set_return_value(regs, ret);
 		break;
 
 	default:
@@ -566,7 +615,7 @@ void umcg_notify_resume(struct pt_regs *
 
 done:
 	if (worker)
-		current->flags |= PF_UMCG_WORKER;
+		tsk->flags |= PF_UMCG_WORKER;
 }
 
 /**
@@ -755,16 +804,7 @@ SYSCALL_DEFINE2(umcg_wait, u32, flags, u
 
 	umcg_unpin_pages();
 
-	ret = umcg_wait(timo);
-	switch (ret) {
-	case 0:		/* all done */
-	case -EINTR:	/* umcg_notify_resume() will continue the wait */
-		ret = 0;
-		break;
-
-	default:
-		goto unblock;
-	}
+	/* notify-resume will wait */
 out:
 	if (worker)
 		tsk->flags |= PF_UMCG_WORKER;
@@ -831,7 +871,7 @@ static int umcg_register(struct umcg_tas
 		set_syscall_work(SYSCALL_UMCG);		/* hook syscall */
 		set_thread_flag(TIF_UMCG);		/* hook return-to-user */
 
-		umcg_unblock_and_wait();
+		umcg_unblock();
 
 	} else {
 		if ((ut.state & (UMCG_TASK_MASK | UMCG_TF_MASK)) != UMCG_TASK_RUNNING)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ