[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250106171402.GC22191@noisy.programming.kicks-ass.net>
Date: Mon, 6 Jan 2025 18:14:02 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Doug Smythies <dsmythies@...us.net>
Cc: linux-kernel@...r.kernel.org, vincent.guittot@...aro.org
Subject: Re: [REGRESSION] Re: [PATCH 00/24] Complete EEVDF
On Mon, Jan 06, 2025 at 06:04:55PM +0100, Peter Zijlstra wrote:
> On Mon, Jan 06, 2025 at 05:59:32PM +0100, Peter Zijlstra wrote:
> > On Mon, Jan 06, 2025 at 07:01:34AM -0800, Doug Smythies wrote:
> >
> > > > What is the easiest 100% load you're seeing this with?
> > >
> > > Lately, and specifically to be able to tell others, I have been using:
> > >
> > > yes > /dev/null &
> > >
> > > On my Intel i5-10600K, with 6 cores and 2 threads per core, 12 CPUs,
> > > I run 12 of those work loads.
> >
> > On my headless ivb-ep 2 sockets, 10 cores each and 2 threads per core, I
> > do:
> >
> > for ((i=0; i<40; i++)) ; do yes > /dev/null & done
> > tools/power/x86/turbostat/turbostat --quiet --Summary --show Busy%,Bzy_MHz,IRQ,PkgWatt,PkgTmp,TSC_MHz --interval 1
> >
> > But no so far, nada :-( I've tried with full preemption and voluntary,
> > HZ=1000.
> >
>
> And just as I send this, I see these happen:
>
> 100.00 3100 2793 40302 71 195.22
> 100.00 3100 2618 40459 72 183.58
> 100.00 3100 2993 46215 71 209.21
> 100.00 3100 2789 40467 71 195.19
> 99.92 3100 2798 40589 71 195.76
> 100.00 3100 2793 40397 72 195.46
> ...
> 100.00 3100 2844 41906 71 199.43
> 100.00 3100 2779 40468 71 194.51
> 99.96 3100 2320 40933 71 163.23
> 100.00 3100 3529 61823 72 245.70
> 100.00 3100 2793 40493 72 195.45
> 100.00 3100 2793 40462 72 195.56
>
> They look like funny little blips. Nowhere near as bad as you had
> though.
Anyway, given you've confirmed disabling DELAY_DEQUEUE fixes things,
could you perhaps try the below hackery for me? Its a bit of a wild
guess, but throw stuff at wall, see what sticks etc..
---
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84902936a620..fa4b9891f93a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3019,7 +3019,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
} else {
if (!is_migration_disabled(p)) {
- if (task_on_rq_queued(p))
+ if (task_on_rq_queued(p) && !p->se.sched_delayed)
rq = move_queued_task(rq, rf, p, dest_cpu);
if (!pending->stop_pending) {
@@ -3776,28 +3776,30 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
*/
static int ttwu_runnable(struct task_struct *p, int wake_flags)
{
- struct rq_flags rf;
- struct rq *rq;
- int ret = 0;
+ CLASS(__task_rq_lock, rq_guard)(p);
+ struct rq *rq = rq_guard.rq;
- rq = __task_rq_lock(p, &rf);
- if (task_on_rq_queued(p)) {
- update_rq_clock(rq);
- if (p->se.sched_delayed)
- enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
- if (!task_on_cpu(rq, p)) {
- /*
- * When on_rq && !on_cpu the task is preempted, see if
- * it should preempt the task that is current now.
- */
- wakeup_preempt(rq, p, wake_flags);
+ if (!task_on_rq_queued(p))
+ return 0;
+
+ update_rq_clock(rq);
+ if (p->se.sched_delayed) {
+ int queue_flags = ENQUEUE_NOCLOCK | ENQUEUE_DELAYED;
+ if (!is_cpu_allowed(p, cpu_of(rq))) {
+ dequeue_task(rq, p, DEQUEUE_SLEEP | queue_flags);
+ return 0;
}
- ttwu_do_wakeup(p);
- ret = 1;
+ enqueue_task(rq, p, queue_flags);
}
- __task_rq_unlock(rq, &rf);
-
- return ret;
+ if (!task_on_cpu(rq, p)) {
+ /*
+ * When on_rq && !on_cpu the task is preempted, see if
+ * it should preempt the task that is current now.
+ */
+ wakeup_preempt(rq, p, wake_flags);
+ }
+ ttwu_do_wakeup(p);
+ return 1;
}
#ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 65fa64845d9f..b4c1f6c06c18 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1793,6 +1793,11 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
}
+DEFINE_LOCK_GUARD_1(__task_rq_lock, struct task_struct,
+ _T->rq = __task_rq_lock(_T->lock, &_T->rf),
+ __task_rq_unlock(_T->rq, &_T->rf),
+ struct rq *rq; struct rq_flags rf)
+
DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct,
_T->rq = task_rq_lock(_T->lock, &_T->rf),
task_rq_unlock(_T->rq, _T->lock, &_T->rf),
Powered by blists - more mailing lists