[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220121151845.GB22849@worktop.programming.kicks-ass.net>
Date: Fri, 21 Jan 2022 16:18:45 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: mingo@...hat.com, tglx@...utronix.de, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
bristot@...hat.com
Cc: linux-kernel@...r.kernel.org, linux-mm@...ck.org,
linux-api@...r.kernel.org, x86@...nel.org, pjt@...gle.com,
posk@...gle.com, avagin@...gle.com, jannh@...gle.com,
tdelisle@...terloo.ca, mark.rutland@....com, posk@...k.io
Subject: Re: [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups
On Fri, Jan 21, 2022 at 12:47:58PM +0100, Peter Zijlstra wrote:
> On Thu, Jan 20, 2022 at 04:55:22PM +0100, Peter Zijlstra wrote:
>
> > +SYSCALL_DEFINE2(umcg_wait, u32, flags, u64, timo)
> > +{
> > + struct task_struct *tsk = current;
> > + struct umcg_task __user *self = READ_ONCE(tsk->umcg_task);
> > + bool worker = tsk->flags & PF_UMCG_WORKER;
> > + int ret;
> > +
> > + if (!self || flags)
> > + return -EINVAL;
> > +
> > + if (worker) {
> > + tsk->flags &= ~PF_UMCG_WORKER;
> > + if (timo)
> > + return -ERANGE;
> > + }
> > +
> > + /* see umcg_sys_{enter,exit}() syscall exceptions */
> > + ret = umcg_pin_pages();
> > + if (ret)
> > + goto unblock;
> > +
> > + /*
> > + * Clear UMCG_TF_COND_WAIT *and* check state == RUNNABLE.
> > + */
> > + ret = umcg_update_state(tsk, self, UMCG_TASK_RUNNABLE, UMCG_TASK_RUNNABLE);
> > + if (ret)
> > + goto unpin;
> > +
> > + ret = umcg_wake_next(tsk, self);
> > + if (ret)
> > + goto unpin;
> > +
> > + if (worker) {
> > + /*
> > + * If this fails it is possible ::next_tid is already running
> > + * while this task is not going to block. This violates our
> > + * constraints.
> > + *
> > + * That said, pretty much the only way to make this fail is by
> > + * force munmap()'ing things. In which case one is most welcome
> > + * to the pieces.
> > + */
> > + ret = umcg_enqueue_and_wake(tsk);
> > + if (ret)
> > + goto unpin;
> > + }
> > +
> > + umcg_unpin_pages();
> > +
> > + ret = umcg_wait(timo);
> > + switch (ret) {
> > + case 0: /* all done */
> > + case -EINTR: /* umcg_notify_resume() will continue the wait */
>
> So I was playing with the whole worker timeout thing last night and
> realized this is broken. If we get a signal while we have a timeout, the
> timeout gets lost.
>
> I think the easiest solution is to have umcg_notify_resume() also resume
> the timeout, but the first pass of that was yuck, so I need to try
> again.
Something like this, still yuck though. Also still need to write me a
test for this.
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1300,12 +1300,14 @@ struct task_struct {
clockid_t umcg_clock;
struct umcg_task __user *umcg_task;
- /* setup by umcg_pin_enter() */
+ /* setup by umcg_pin_pages() */
struct page *umcg_page;
struct task_struct *umcg_server;
struct umcg_task __user *umcg_server_task;
struct page *umcg_server_page;
+
+ u64 umcg_timeout;
#endif
struct tlbflush_unmap_batch tlb_ubc;
--- a/kernel/sched/umcg.c
+++ b/kernel/sched/umcg.c
@@ -232,6 +232,8 @@ static int umcg_update_state(struct task
/* Called from syscall enter path and exceptions that can schedule */
void umcg_sys_enter(struct pt_regs *regs, long syscall)
{
+ current->umcg_timeout = 0;
+
/* avoid recursion vs our own syscalls */
if (syscall == __NR_umcg_wait ||
syscall == __NR_umcg_ctl)
@@ -519,6 +521,7 @@ void umcg_notify_resume(struct pt_regs *
struct umcg_task __user *self = tsk->umcg_task;
bool worker = tsk->flags & PF_UMCG_WORKER;
u32 state;
+ int ret;
/* avoid recursion vs schedule() */
if (worker)
@@ -554,12 +557,17 @@ void umcg_notify_resume(struct pt_regs *
umcg_unpin_pages();
}
- switch (umcg_wait(0)) {
+ ret = umcg_wait(tsk->umcg_timeout);
+ switch (ret) {
case 0:
case -EINTR:
/* we will resume the wait after the signal */
break;
+ case -ETIMEDOUT:
+ regs_set_return_value(regs, ret);
+ break;
+
default:
UMCG_DIE("wait");
}
@@ -759,6 +767,7 @@ SYSCALL_DEFINE2(umcg_wait, u32, flags, u
switch (ret) {
case 0: /* all done */
case -EINTR: /* umcg_notify_resume() will continue the wait */
+ tsk->umcg_timeout = timo;
ret = 0;
break;
Powered by blists - more mailing lists